1990 files changed, 57210 insertions, 27270 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index c17cd4bb2290..1b777b960492 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -328,8 +328,6 @@ sysrq.txt
 	- info on the magic SysRq key.
 telephony/
 	- directory with info on telephony (e.g. voice over IP) support.
-uml/
-	- directory with information about User Mode Linux.
 unicode.txt
 	- info on the Unicode character/font mapping used in Linux.
 unshare.txt
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 7564e88bfa43..e7be75b96e4b 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -183,21 +183,21 @@ Description:	Discover and change clock speed of CPUs
 		to learn how to control the knobs.
 
 
-What:      /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
-Date:      August 2008
+What:		/sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
+Date:		August 2008
 KernelVersion:	2.6.27
-Contact:	mark.langsdorf@amd.com
-Description:	These files exist in every cpu's cache index directories.
-		There are currently 2 cache_disable_# files in each
-		directory.  Reading from these files on a supported
-		processor will return that cache disable index value
-		for that processor and node.  Writing to one of these
-		files will cause the specificed cache index to be disabled.
-
-		Currently, only AMD Family 10h Processors support cache index
-		disable, and only for their L3 caches.  See the BIOS and
-		Kernel Developer's Guide at
-		http://support.amd.com/us/Embedded_TechDocs/31116-Public-GH-BKDG_3-28_5-28-09.pdf	
-		for formatting information and other details on the
-		cache index disable.
-Users:    joachim.deguara@amd.com
+Contact:	discuss@x86-64.org
+Description:	Disable L3 cache indices
+
+		These files exist in every CPU's cache/index3 directory. Each
+		cache_disable_{0,1} file corresponds to one disable slot which
+		can be used to disable a cache index. Reading from these files
+		on a processor with this functionality will return the currently
+		disabled index for that node. There is one L3 structure per
+		node, or per internal node on MCM machines. Writing a valid
+		index to one of these files will cause the specificed cache
+		index to be disabled.
+
+		All AMD processors with L3 caches provide this functionality.
+		For details, see BKDGs at
+		http://developer.amd.com/documentation/guides/Pages/default.aspx
diff --git a/Documentation/ABI/testing/sysfs-firmware-dmi b/Documentation/ABI/testing/sysfs-firmware-dmi
index ba9da9503c23..c78f9ab01e56 100644
--- a/Documentation/ABI/testing/sysfs-firmware-dmi
+++ b/Documentation/ABI/testing/sysfs-firmware-dmi
@@ -14,14 +14,15 @@ Description:
 
 		DMI is structured as a large table of entries, where
 		each entry has a common header indicating the type and
-		length of the entry, as well as 'handle' that is
-		supposed to be unique amongst all entries.
+		length of the entry, as well as a firmware-provided
+		'handle' that is supposed to be unique amongst all
+		entries.
 
 		Some entries are required by the specification, but many
 		others are optional.  In general though, users should
 		never expect to find a specific entry type on their
 		system unless they know for certain what their firmware
-		is doing.  Machine to machine will vary.
+		is doing.  Machine to machine experiences will vary.
 
 		Multiple entries of the same type are allowed.  In order
 		to handle these duplicate entry types, each entry is
@@ -67,25 +68,24 @@ Description:
 			  and the two terminating nul characters.
 		type	: The type of the entry.  This value is the same
 			  as found in the directory name.  It indicates
-			  how the rest of the entry should be
-			  interpreted.
+			  how the rest of the entry should be interpreted.
 		instance: The instance ordinal of the entry for the
 			  given type.  This value is the same as found
 			  in the parent directory name.
-		position: The position of the entry within the entirety
-			  of the entirety.
+		position: The ordinal position (zero-based) of the entry
+			  within the entirety of the DMI entry table.
 
 		=== Entry Specialization ===
 
 		Some entry types may have other information available in
-		sysfs.
+		sysfs.  Not all types are specialized.
 
 		--- Type 15 - System Event Log ---
 
 		This entry allows the firmware to export a log of
 		events the system has taken.  This information is
 		typically backed by nvram, but the implementation
-		details are abstracted by this table.  This entries data
+		details are abstracted by this table.  This entry's data
 		is exported in the directory:
 
 		/sys/firmware/dmi/entries/15-0/system_event_log
diff --git a/Documentation/ABI/testing/sysfs-firmware-gsmi b/Documentation/ABI/testing/sysfs-firmware-gsmi
new file mode 100644
index 000000000000..0faa0aaf4b6a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-gsmi
@@ -0,0 +1,58 @@
+What:		/sys/firmware/gsmi
+Date:		March 2011
+Contact:	Mike Waychison <mikew@google.com>
+Description:
+		Some servers used internally at Google have firmware
+		that provides callback functionality via explicit SMI
+		triggers.  Some of the callbacks are similar to those
+		provided by the EFI runtime services page, but due to
+		historical reasons this different entry-point has been
+		used.
+
+		The gsmi driver implements the kernel's abstraction for
+		these firmware callbacks.  Currently, this functionality
+		is limited to handling the system event log and getting
+		access to EFI-style variables stored in nvram.
+
+		Layout:
+
+		/sys/firmware/gsmi/vars:
+
+			This directory has the same layout (and
+			underlying implementation as /sys/firmware/efi/vars.
+			See Documentation/ABI/*/sysfs-firmware-efi-vars
+			for more information on how to interact with
+			this structure.
+
+		/sys/firmware/gsmi/append_to_eventlog - write-only:
+
+			This file takes a binary blob and passes it onto
+			the firmware to be timestamped and appended to
+			the system eventlog.  The binary format is
+			interpreted by the firmware and may change from
+			platform to platform.  The only kernel-enforced
+			requirement is that the blob be prefixed with a
+			32bit host-endian type used as part of the
+			firmware call.
+
+		/sys/firmware/gsmi/clear_config - write-only:
+
+			Writing any value to this file will cause the
+			entire firmware configuration to be reset to
+			"factory defaults".  Callers should assume that
+			a reboot is required for the configuration to be
+			cleared.
+
+		/sys/firmware/gsmi/clear_eventlog - write-only:
+
+			This file is used to clear out a portion/the
+			whole of the system event log.  Values written
+			should be values between 1 and 100 inclusive (in
+			ASCII) representing the fraction of the log to
+			clear.  Not all platforms support fractional
+			clearing though, and this writes to this file
+			will error out if the firmware doesn't like your
+			submitted fraction.
+
+			Callers should assume that a reboot is needed
+			for this operation to complete.
diff --git a/Documentation/ABI/testing/sysfs-firmware-log b/Documentation/ABI/testing/sysfs-firmware-log
new file mode 100644
index 000000000000..9b58e7c5365f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-log
@@ -0,0 +1,7 @@
+What:		/sys/firmware/log
+Date:		February 2011
+Contact:	Mike Waychison <mikew@google.com>
+Description:
+		The /sys/firmware/log is a binary file that represents a
+		read-only copy of the firmware's log if one is
+		available.
diff --git a/Documentation/ABI/testing/sysfs-kernel-fscaps b/Documentation/ABI/testing/sysfs-kernel-fscaps
new file mode 100644
index 000000000000..50a3033b5e15
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-fscaps
@@ -0,0 +1,8 @@
+What:		/sys/kernel/fscaps
+Date:		February 2011
+KernelVersion:	2.6.38
+Contact:	Ludwig Nussel <ludwig.nussel@suse.de>
+Description
+		Shows whether file system capabilities are honored
+		when executing a binary
+
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 194ca446ac28..b464d12761ba 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -158,3 +158,17 @@ Description:
 		successful, will make the kernel abort a subsequent transition
 		to a sleep state if any wakeup events are reported after the
 		write has returned.
+
+What:		/sys/power/reserved_size
+Date:		May 2011
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power/reserved_size file allows user space to control
+		the amount of memory reserved for allocations made by device
+		drivers during the "device freeze" stage of hibernation.  It can
+		be written a string representing a non-negative integer that
+		will be used as the amount of memory to reserve for allocations
+		made by device drivers' "freeze" callbacks, in bytes.
+
+		Reading from this file will display the current value, which is
+		set to 1 MB by default.
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 36f63d4a0a06..b638e50cf8f6 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -96,10 +96,10 @@ X!Iinclude/linux/kobject.h
 
   <chapter id="devdrivers">
      <title>Device drivers infrastructure</title>
+     <sect1><title>The Basic Device Driver-Model Structures </title>
+!Iinclude/linux/device.h
+     </sect1>
      <sect1><title>Device Drivers Base</title>
-<!--
-X!Iinclude/linux/device.h
--->
 !Edrivers/base/driver.c
 !Edrivers/base/core.c
 !Edrivers/base/class.c
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
index fb10fd08c05c..b3422341d65c 100644
--- a/Documentation/DocBook/genericirq.tmpl
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -191,8 +191,8 @@
 	<para>
 	Whenever an interrupt triggers, the lowlevel arch code calls into
 	the generic interrupt code by calling desc->handle_irq().
-	This highlevel IRQ handling function only uses desc->chip primitives
-	referenced by the assigned chip descriptor structure.
+	This highlevel IRQ handling function only uses desc->irq_data.chip
+	primitives referenced by the assigned chip descriptor structure.
 	</para>
     </sect1>
     <sect1 id="Highlevel_Driver_API">
@@ -206,11 +206,11 @@
 	  <listitem><para>enable_irq()</para></listitem>
 	  <listitem><para>disable_irq_nosync() (SMP only)</para></listitem>
 	  <listitem><para>synchronize_irq() (SMP only)</para></listitem>
-	  <listitem><para>set_irq_type()</para></listitem>
-	  <listitem><para>set_irq_wake()</para></listitem>
-	  <listitem><para>set_irq_data()</para></listitem>
-	  <listitem><para>set_irq_chip()</para></listitem>
-	  <listitem><para>set_irq_chip_data()</para></listitem>
+	  <listitem><para>irq_set_irq_type()</para></listitem>
+	  <listitem><para>irq_set_irq_wake()</para></listitem>
+	  <listitem><para>irq_set_handler_data()</para></listitem>
+	  <listitem><para>irq_set_chip()</para></listitem>
+	  <listitem><para>irq_set_chip_data()</para></listitem>
           </itemizedlist>
 	  See the autogenerated function documentation for details.
 	</para>
@@ -225,6 +225,8 @@
 	  <listitem><para>handle_fasteoi_irq</para></listitem>
 	  <listitem><para>handle_simple_irq</para></listitem>
 	  <listitem><para>handle_percpu_irq</para></listitem>
+	  <listitem><para>handle_edge_eoi_irq</para></listitem>
+	  <listitem><para>handle_bad_irq</para></listitem>
 	  </itemizedlist>
 	  The interrupt flow handlers (either predefined or architecture
 	  specific) are assigned to specific interrupts by the architecture
@@ -241,13 +243,13 @@
 		<programlisting>
 default_enable(struct irq_data *data)
 {
-	desc->chip->irq_unmask(data);
+	desc->irq_data.chip->irq_unmask(data);
 }
 
 default_disable(struct irq_data *data)
 {
 	if (!delay_disable(data))
-		desc->chip->irq_mask(data);
+		desc->irq_data.chip->irq_mask(data);
 }
 
 default_ack(struct irq_data *data)
@@ -284,9 +286,9 @@ noop(struct irq_data *data))
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-desc->chip->irq_mask();
-handle_IRQ_event(desc->action);
-desc->chip->irq_unmask();
+desc->irq_data.chip->irq_mask_ack();
+handle_irq_event(desc->action);
+desc->irq_data.chip->irq_unmask();
 		</programlisting>
 		</para>
 	    </sect3>
@@ -300,8 +302,8 @@ desc->chip->irq_unmask();
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-handle_IRQ_event(desc->action);
-desc->chip->irq_eoi();
+handle_irq_event(desc->action);
+desc->irq_data.chip->irq_eoi();
 		</programlisting>
 		</para>
 	    </sect3>
@@ -315,17 +317,17 @@ desc->chip->irq_eoi();
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
 if (desc->status &amp; running) {
-	desc->chip->irq_mask();
+	desc->irq_data.chip->irq_mask_ack();
 	desc->status |= pending | masked;
 	return;
 }
-desc->chip->irq_ack();
+desc->irq_data.chip->irq_ack();
 desc->status |= running;
 do {
 	if (desc->status &amp; masked)
-		desc->chip->irq_unmask();
+		desc->irq_data.chip->irq_unmask();
 	desc->status &amp;= ~pending;
-	handle_IRQ_event(desc->action);
+	handle_irq_event(desc->action);
 } while (status &amp; pending);
 desc->status &amp;= ~running;
 		</programlisting>
@@ -344,7 +346,7 @@ desc->status &amp;= ~running;
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-handle_IRQ_event(desc->action);
+handle_irq_event(desc->action);
 		</programlisting>
 		</para>
    	    </sect3>
@@ -362,12 +364,29 @@ handle_IRQ_event(desc->action);
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-handle_IRQ_event(desc->action);
-if (desc->chip->irq_eoi)
-        desc->chip->irq_eoi();
+if (desc->irq_data.chip->irq_ack)
+	desc->irq_data.chip->irq_ack();
+handle_irq_event(desc->action);
+if (desc->irq_data.chip->irq_eoi)
+        desc->irq_data.chip->irq_eoi();
 		</programlisting>
 		</para>
    	    </sect3>
+	    <sect3 id="EOI_Edge_IRQ_flow_handler">
+	 	<title>EOI Edge IRQ flow handler</title>
+		<para>
+		handle_edge_eoi_irq provides an abnomination of the edge
+		handler which is solely used to tame a badly wreckaged
+		irq controller on powerpc/cell.
+		</para>
+   	    </sect3>
+	    <sect3 id="BAD_IRQ_flow_handler">
+	 	<title>Bad IRQ flow handler</title>
+		<para>
+		handle_bad_irq is used for spurious interrupts which
+		have no real handler assigned..
+		</para>
+   	    </sect3>
 	</sect2>
 	<sect2 id="Quirks_and_optimizations">
 	<title>Quirks and optimizations</title>
@@ -410,6 +429,7 @@ if (desc->chip->irq_eoi)
 	  <listitem><para>irq_mask_ack() - Optional, recommended for performance</para></listitem>
 	  <listitem><para>irq_mask()</para></listitem>
 	  <listitem><para>irq_unmask()</para></listitem>
+	  <listitem><para>irq_eoi() - Optional, required for eoi flow handlers</para></listitem>
 	  <listitem><para>irq_retrigger() - Optional</para></listitem>
 	  <listitem><para>irq_set_type() - Optional</para></listitem>
 	  <listitem><para>irq_set_wake() - Optional</para></listitem>
@@ -424,32 +444,24 @@ if (desc->chip->irq_eoi)
   <chapter id="doirq">
      <title>__do_IRQ entry point</title>
      <para>
- 	The original implementation __do_IRQ() is an alternative entry
-	point for all types of interrupts.
+	The original implementation __do_IRQ() was an alternative entry
+	point for all types of interrupts. It not longer exists.
      </para>
      <para>
 	This handler turned out to be not suitable for all
 	interrupt hardware and was therefore reimplemented with split
-	functionality for egde/level/simple/percpu interrupts. This is not
+	functionality for edge/level/simple/percpu interrupts. This is not
 	only a functional optimization. It also shortens code paths for
 	interrupts.
       </para>
-      <para>
-	To make use of the split implementation, replace the call to
-	__do_IRQ by a call to desc->handle_irq() and associate
-        the appropriate handler function to desc->handle_irq().
-	In most cases the generic handler implementations should
-	be sufficient.
-     </para>
   </chapter>
 
   <chapter id="locking">
      <title>Locking on SMP</title>
      <para>
 	The locking of chip registers is up to the architecture that
-	defines the chip primitives. There is a chip->lock field that can be used
-	for serialization, but the generic layer does not touch it. The per-irq
-	structure is protected via desc->lock, by the generic layer.
+	defines the chip primitives. The per-irq structure is
+	protected via desc->lock, by the generic layer.
      </para>
   </chapter>
   <chapter id="structs">
diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl
index 5d259c632cdf..fea63b45471a 100644
--- a/Documentation/DocBook/media-entities.tmpl
+++ b/Documentation/DocBook/media-entities.tmpl
@@ -294,6 +294,7 @@
 <!ENTITY sub-srggb10 SYSTEM "v4l/pixfmt-srggb10.xml">
 <!ENTITY sub-srggb8 SYSTEM "v4l/pixfmt-srggb8.xml">
 <!ENTITY sub-y10 SYSTEM "v4l/pixfmt-y10.xml">
+<!ENTITY sub-y12 SYSTEM "v4l/pixfmt-y12.xml">
 <!ENTITY sub-pixfmt SYSTEM "v4l/pixfmt.xml">
 <!ENTITY sub-cropcap SYSTEM "v4l/vidioc-cropcap.xml">
 <!ENTITY sub-dbg-g-register SYSTEM "v4l/vidioc-dbg-g-register.xml">
diff --git a/Documentation/DocBook/v4l/media-ioc-setup-link.xml b/Documentation/DocBook/v4l/media-ioc-setup-link.xml
index 2331e76ded17..cec97af4dab4 100644
--- a/Documentation/DocBook/v4l/media-ioc-setup-link.xml
+++ b/Documentation/DocBook/v4l/media-ioc-setup-link.xml
@@ -34,7 +34,7 @@
       <varlistentry>
 	<term><parameter>request</parameter></term>
 	<listitem>
-	  <para>MEDIA_IOC_ENUM_LINKS</para>
+	  <para>MEDIA_IOC_SETUP_LINK</para>
 	</listitem>
       </varlistentry>
       <varlistentry>
diff --git a/Documentation/DocBook/v4l/pixfmt-y12.xml b/Documentation/DocBook/v4l/pixfmt-y12.xml
new file mode 100644
index 000000000000..ff417b858cc9
--- /dev/null
+++ b/Documentation/DocBook/v4l/pixfmt-y12.xml
@@ -0,0 +1,79 @@
+<refentry id="V4L2-PIX-FMT-Y12">
+  <refmeta>
+    <refentrytitle>V4L2_PIX_FMT_Y12 ('Y12 ')</refentrytitle>
+    &manvol;
+  </refmeta>
+  <refnamediv>
+    <refname><constant>V4L2_PIX_FMT_Y12</constant></refname>
+    <refpurpose>Grey-scale image</refpurpose>
+  </refnamediv>
+  <refsect1>
+    <title>Description</title>
+
+    <para>This is a grey-scale image with a depth of 12 bits per pixel. Pixels
+are stored in 16-bit words with unused high bits padded with 0. The least
+significant byte is stored at lower memory addresses (little-endian).</para>
+
+    <example>
+      <title><constant>V4L2_PIX_FMT_Y12</constant> 4 &times; 4
+pixel image</title>
+
+      <formalpara>
+	<title>Byte Order.</title>
+	<para>Each cell is one byte.
+	  <informaltable frame="none">
+	    <tgroup cols="9" align="center">
+	      <colspec align="left" colwidth="2*" />
+	      <tbody valign="top">
+		<row>
+		  <entry>start&nbsp;+&nbsp;0:</entry>
+		  <entry>Y'<subscript>00low</subscript></entry>
+		  <entry>Y'<subscript>00high</subscript></entry>
+		  <entry>Y'<subscript>01low</subscript></entry>
+		  <entry>Y'<subscript>01high</subscript></entry>
+		  <entry>Y'<subscript>02low</subscript></entry>
+		  <entry>Y'<subscript>02high</subscript></entry>
+		  <entry>Y'<subscript>03low</subscript></entry>
+		  <entry>Y'<subscript>03high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;8:</entry>
+		  <entry>Y'<subscript>10low</subscript></entry>
+		  <entry>Y'<subscript>10high</subscript></entry>
+		  <entry>Y'<subscript>11low</subscript></entry>
+		  <entry>Y'<subscript>11high</subscript></entry>
+		  <entry>Y'<subscript>12low</subscript></entry>
+		  <entry>Y'<subscript>12high</subscript></entry>
+		  <entry>Y'<subscript>13low</subscript></entry>
+		  <entry>Y'<subscript>13high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;16:</entry>
+		  <entry>Y'<subscript>20low</subscript></entry>
+		  <entry>Y'<subscript>20high</subscript></entry>
+		  <entry>Y'<subscript>21low</subscript></entry>
+		  <entry>Y'<subscript>21high</subscript></entry>
+		  <entry>Y'<subscript>22low</subscript></entry>
+		  <entry>Y'<subscript>22high</subscript></entry>
+		  <entry>Y'<subscript>23low</subscript></entry>
+		  <entry>Y'<subscript>23high</subscript></entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;24:</entry>
+		  <entry>Y'<subscript>30low</subscript></entry>
+		  <entry>Y'<subscript>30high</subscript></entry>
+		  <entry>Y'<subscript>31low</subscript></entry>
+		  <entry>Y'<subscript>31high</subscript></entry>
+		  <entry>Y'<subscript>32low</subscript></entry>
+		  <entry>Y'<subscript>32high</subscript></entry>
+		  <entry>Y'<subscript>33low</subscript></entry>
+		  <entry>Y'<subscript>33high</subscript></entry>
+		</row>
+	      </tbody>
+	    </tgroup>
+	  </informaltable>
+	</para>
+      </formalpara>
+    </example>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index c6fdcbbd1b41..40af4beb48b9 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -696,6 +696,7 @@ information.</para>
     &sub-packed-yuv;
     &sub-grey;
     &sub-y10;
+    &sub-y12;
     &sub-y16;
     &sub-yuyv;
     &sub-uyvy;
diff --git a/Documentation/DocBook/v4l/subdev-formats.xml b/Documentation/DocBook/v4l/subdev-formats.xml
index 7041127d6dfc..d7ccd25edcc1 100644
--- a/Documentation/DocBook/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/v4l/subdev-formats.xml
@@ -456,6 +456,23 @@
 	      <entry>b<subscript>1</subscript></entry>
 	      <entry>b<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-SGBRG8-1X8">
+	      <entry>V4L2_MBUS_FMT_SGBRG8_1X8</entry>
+	      <entry>0x3013</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-SGRBG8-1X8">
 	      <entry>V4L2_MBUS_FMT_SGRBG8_1X8</entry>
 	      <entry>0x3002</entry>
@@ -473,6 +490,23 @@
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-SRGGB8-1X8">
+	      <entry>V4L2_MBUS_FMT_SRGGB8_1X8</entry>
+	      <entry>0x3014</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-SBGGR10-DPCM8-1X8">
 	      <entry>V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8</entry>
 	      <entry>0x300b</entry>
@@ -2159,6 +2193,31 @@
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
+	    <row id="V4L2-MBUS-FMT-Y12-1X12">
+	      <entry>V4L2_MBUS_FMT_Y12_1X12</entry>
+	      <entry>0x2013</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	    </row>
 	    <row id="V4L2-MBUS-FMT-UYVY8-1X16">
 	      <entry>V4L2_MBUS_FMT_UYVY8_1X16</entry>
 	      <entry>0x200f</entry>
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 71b6f500ddb9..1d7a885761f5 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -21,7 +21,7 @@ rcu.txt
 RTFP.txt
 	- List of RCU papers (bibliography) going back to 1980.
 stallwarn.txt
-	- RCU CPU stall warnings (CONFIG_RCU_CPU_STALL_DETECTOR)
+	- RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
 torture.txt
 	- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
 trace.txt
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 862c08ef1fde..4e959208f736 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -1,22 +1,25 @@
 Using RCU's CPU Stall Detector
 
-The CONFIG_RCU_CPU_STALL_DETECTOR kernel config parameter enables
-RCU's CPU stall detector, which detects conditions that unduly delay
-RCU grace periods.  The stall detector's idea of what constitutes
-"unduly delayed" is controlled by a set of C preprocessor macros:
+The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall
+detector, which detects conditions that unduly delay RCU grace periods.
+This module parameter enables CPU stall detection by default, but
+may be overridden via boot-time parameter or at runtime via sysfs.
+The stall detector's idea of what constitutes "unduly delayed" is
+controlled by a set of kernel configuration variables and cpp macros:
 
-RCU_SECONDS_TILL_STALL_CHECK
+CONFIG_RCU_CPU_STALL_TIMEOUT
 
-	This macro defines the period of time that RCU will wait from
-	the beginning of a grace period until it issues an RCU CPU
-	stall warning.	This time period is normally ten seconds.
+	This kernel configuration parameter defines the period of time
+	that RCU will wait from the beginning of a grace period until it
+	issues an RCU CPU stall warning.  This time period is normally
+	ten seconds.
 
 RCU_SECONDS_TILL_STALL_RECHECK
 
 	This macro defines the period of time that RCU will wait after
 	issuing a stall warning until it issues another stall warning
-	for the same stall.  This time period is normally set to thirty
-	seconds.
+	for the same stall.  This time period is normally set to three
+	times the check interval plus thirty seconds.
 
 RCU_STALL_RAT_DELAY
 
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 6a8c73f55b80..c078ad48f7a1 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -10,34 +10,46 @@ for rcutree and next for rcutiny.
 
 CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
 
-These implementations of RCU provides five debugfs files under the
-top-level directory RCU: rcu/rcudata (which displays fields in struct
-rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of
-rcu/rcudata), rcu/rcugp (which displays grace-period counters),
-rcu/rcuhier (which displays the struct rcu_node hierarchy), and
-rcu/rcu_pending (which displays counts of the reasons that the
-rcu_pending() function decided that there was core RCU work to do).
+These implementations of RCU provides several debugfs files under the
+top-level directory "rcu":
+
+rcu/rcudata:
+	Displays fields in struct rcu_data.
+rcu/rcudata.csv:
+	Comma-separated values spreadsheet version of rcudata.
+rcu/rcugp:
+	Displays grace-period counters.
+rcu/rcuhier:
+	Displays the struct rcu_node hierarchy.
+rcu/rcu_pending:
+	Displays counts of the reasons rcu_pending() decided that RCU had
+	work to do.
+rcu/rcutorture:
+	Displays rcutorture test progress.
+rcu/rcuboost:
+	Displays RCU boosting statistics.  Only present if
+	CONFIG_RCU_BOOST=y.
 
 The output of "cat rcu/rcudata" looks as follows:
 
 rcu_sched:
-  0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
-  1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
-  2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
-  3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
-  4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
-  5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
-  6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
-  7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
+  0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
+  1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
+  2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
+  3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
+  4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
+  5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
+  6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
+  7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
 rcu_bh:
-  0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
-  1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
-  2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
-  3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
-  4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
-  5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
-  6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
-  7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
+  1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
+  2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
+  3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
+  4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
+  5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
+  6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
+  7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
 
 The first section lists the rcu_data structures for rcu_sched, the second
 for rcu_bh.  Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
@@ -52,17 +64,18 @@ o	The number at the beginning of each line is the CPU number.
 	substantially larger than the number of actual CPUs.
 
 o	"c" is the count of grace periods that this CPU believes have
-	completed.  CPUs in dynticks idle mode may lag quite a ways
-	behind, for example, CPU 4 under "rcu_sched" above, which has
-	slept through the past 25 RCU grace periods.  It is not unusual
-	to see CPUs lagging by thousands of grace periods.
+	completed.  Offlined CPUs and CPUs in dynticks idle mode may
+	lag quite a ways behind, for example, CPU 6 under "rcu_sched"
+	above, which has been offline through not quite 40,000 RCU grace
+	periods.  It is not unusual to see CPUs lagging by thousands of
+	grace periods.
 
 o	"g" is the count of grace periods that this CPU believes have
-	started.  Again, CPUs in dynticks idle mode may lag behind.
-	If the "c" and "g" values are equal, this CPU has already
-	reported a quiescent state for the last RCU grace period that
-	it is aware of, otherwise, the CPU believes that it owes RCU a
-	quiescent state.
+	started.  Again, offlined CPUs and CPUs in dynticks idle mode
+	may lag behind.  If the "c" and "g" values are equal, this CPU
+	has already reported a quiescent state for the last RCU grace
+	period that it is aware of, otherwise, the CPU believes that it
+	owes RCU a quiescent state.
 
 o	"pq" indicates that this CPU has passed through a quiescent state
 	for the current grace period.  It is possible for "pq" to be
@@ -81,7 +94,8 @@ o	"pqc" indicates which grace period the last-observed quiescent
 	the next grace period!
 
 o	"qp" indicates that RCU still expects a quiescent state from
-	this CPU.
+	this CPU.  Offlined CPUs and CPUs in dyntick idle mode might
+	well have qp=1, which is OK: RCU is still ignoring them.
 
 o	"dt" is the current value of the dyntick counter that is incremented
 	when entering or leaving dynticks idle state, either by the
@@ -108,7 +122,7 @@ o	"df" is the number of times that some other CPU has forced a
 
 o	"of" is the number of times that some other CPU has forced a
 	quiescent state on behalf of this CPU due to this CPU being
-	offline.  In a perfect world, this might neve happen, but it
+	offline.  In a perfect world, this might never happen, but it
 	turns out that offlining and onlining a CPU can take several grace
 	periods, and so there is likely to be an extended period of time
 	when RCU believes that the CPU is online when it really is not.
@@ -125,6 +139,62 @@ o	"ql" is the number of RCU callbacks currently residing on
 	of what state they are in (new, waiting for grace period to
 	start, waiting for grace period to end, ready to invoke).
 
+o	"qs" gives an indication of the state of the callback queue
+	with four characters:
+
+	"N"	Indicates that there are callbacks queued that are not
+		ready to be handled by the next grace period, and thus
+		will be handled by the grace period following the next
+		one.
+
+	"R"	Indicates that there are callbacks queued that are
+		ready to be handled by the next grace period.
+
+	"W"	Indicates that there are callbacks queued that are
+		waiting on the current grace period.
+
+	"D"	Indicates that there are callbacks queued that have
+		already been handled by a prior grace period, and are
+		thus waiting to be invoked.  Note that callbacks in
+		the process of being invoked are not counted here.
+		Callbacks in the process of being invoked are those
+		that have been removed from the rcu_data structures
+		queues by rcu_do_batch(), but which have not yet been
+		invoked.
+
+	If there are no callbacks in a given one of the above states,
+	the corresponding character is replaced by ".".
+
+o	"kt" is the per-CPU kernel-thread state.  The digit preceding
+	the first slash is zero if there is no work pending and 1
+	otherwise.  The character between the first pair of slashes is
+	as follows:
+
+	"S"	The kernel thread is stopped, in other words, all
+		CPUs corresponding to this rcu_node structure are
+		offline.
+
+	"R"	The kernel thread is running.
+
+	"W"	The kernel thread is waiting because there is no work
+		for it to do.
+
+	"O"	The kernel thread is waiting because it has been
+		forced off of its designated CPU or because its
+		->cpus_allowed mask permits it to run on other than
+		its designated CPU.
+
+	"Y"	The kernel thread is yielding to avoid hogging CPU.
+
+	"?"	Unknown value, indicates a bug.
+
+	The number after the final slash is the CPU that the kthread
+	is actually running on.
+
+o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of
+	the number of times that this CPU's per-CPU kthread has gone
+	through its loop servicing invoke_rcu_cpu_kthread() requests.
+
 o	"b" is the batch limit for this CPU.  If more than this number
 	of RCU callbacks is ready to invoke, then the remainder will
 	be deferred.
@@ -174,14 +244,14 @@ o	"gpnum" is the number of grace periods that have started.  It is
 The output of "cat rcu/rcuhier" looks as follows, with very long lines:
 
 c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
-1/1 .>. 0:127 ^0    
-3/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
-3/3f .>. 0:5 ^0    2/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3    
+1/1 ..>. 0:127 ^0
+3/3 ..>. 0:35 ^0    0/0 ..>. 36:71 ^1    0/0 ..>. 72:107 ^2    0/0 ..>. 108:127 ^3
+3/3f ..>. 0:5 ^0    2/3 ..>. 6:11 ^1    0/0 ..>. 12:17 ^2    0/0 ..>. 18:23 ^3    0/0 ..>. 24:29 ^4    0/0 ..>. 30:35 ^5    0/0 ..>. 36:41 ^0    0/0 ..>. 42:47 ^1    0/0 ..>. 48:53 ^2    0/0 ..>. 54:59 ^3    0/0 ..>. 60:65 ^4    0/0 ..>. 66:71 ^5    0/0 ..>. 72:77 ^0    0/0 ..>. 78:83 ^1    0/0 ..>. 84:89 ^2    0/0 ..>. 90:95 ^3    0/0 ..>. 96:101 ^4    0/0 ..>. 102:107 ^5    0/0 ..>. 108:113 ^0    0/0 ..>. 114:119 ^1    0/0 ..>. 120:125 ^2    0/0 ..>. 126:127 ^3
 rcu_bh:
 c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
-0/1 .>. 0:127 ^0    
-0/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
-0/3f .>. 0:5 ^0    0/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3
+0/1 ..>. 0:127 ^0
+0/3 ..>. 0:35 ^0    0/0 ..>. 36:71 ^1    0/0 ..>. 72:107 ^2    0/0 ..>. 108:127 ^3
+0/3f ..>. 0:5 ^0    0/3 ..>. 6:11 ^1    0/0 ..>. 12:17 ^2    0/0 ..>. 18:23 ^3    0/0 ..>. 24:29 ^4    0/0 ..>. 30:35 ^5    0/0 ..>. 36:41 ^0    0/0 ..>. 42:47 ^1    0/0 ..>. 48:53 ^2    0/0 ..>. 54:59 ^3    0/0 ..>. 60:65 ^4    0/0 ..>. 66:71 ^5    0/0 ..>. 72:77 ^0    0/0 ..>. 78:83 ^1    0/0 ..>. 84:89 ^2    0/0 ..>. 90:95 ^3    0/0 ..>. 96:101 ^4    0/0 ..>. 102:107 ^5    0/0 ..>. 108:113 ^0    0/0 ..>. 114:119 ^1    0/0 ..>. 120:125 ^2    0/0 ..>. 126:127 ^3
 
 This is once again split into "rcu_sched" and "rcu_bh" portions,
 and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional
@@ -240,13 +310,20 @@ o	Each element of the form "1/1 0:127 ^0" represents one struct
 		current grace period.
 
 	o	The characters separated by the ">" indicate the state
-		of the blocked-tasks lists.  A "T" preceding the ">"
+		of the blocked-tasks lists.  A "G" preceding the ">"
 		indicates that at least one task blocked in an RCU
 		read-side critical section blocks the current grace
-		period, while a "." preceding the ">" indicates otherwise.
-		The character following the ">" indicates similarly for
-		the next grace period.  A "T" should appear in this
-		field only for rcu-preempt.
+		period, while a "E" preceding the ">" indicates that
+		at least one task blocked in an RCU read-side critical
+		section blocks the current expedited grace period.
+		A "T" character following the ">" indicates that at
+		least one task is blocked within an RCU read-side
+		critical section, regardless of whether any current
+		grace period (expedited or normal) is inconvenienced.
+		A "." character appears if the corresponding condition
+		does not hold, so that "..>." indicates that no tasks
+		are blocked.  In contrast, "GE>T" indicates maximal
+		inconvenience from blocked tasks.
 
 	o	The numbers separated by the ":" are the range of CPUs
 		served by this struct rcu_node.  This can be helpful
@@ -328,6 +405,113 @@ o	"nn" is the number of times that this CPU needed nothing.  Alert
 	is due to short-circuit evaluation in rcu_pending().
 
 
+The output of "cat rcu/rcutorture" looks as follows:
+
+rcutorture test sequence: 0 (test in progress)
+rcutorture update version number: 615
+
+The first line shows the number of rcutorture tests that have completed
+since boot.  If a test is currently running, the "(test in progress)"
+string will appear as shown above.  The second line shows the number of
+update cycles that the current test has started, or zero if there is
+no test in progress.
+
+
+The output of "cat rcu/rcuboost" looks as follows:
+
+0:5 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
+     balk: nt=0 egt=989 bt=0 nb=0 ny=0 nos=16
+6:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
+     balk: nt=0 egt=225 bt=0 nb=0 ny=0 nos=6
+
+This information is output only for rcu_preempt.  Each two-line entry
+corresponds to a leaf rcu_node strcuture.  The fields are as follows:
+
+o	"n:m" is the CPU-number range for the corresponding two-line
+	entry.  In the sample output above, the first entry covers
+	CPUs zero through five and the second entry covers CPUs 6
+	and 7.
+
+o	"tasks=TNEB" gives the state of the various segments of the
+	rnp->blocked_tasks list:
+
+	"T"	This indicates that there are some tasks that blocked
+		while running on one of the corresponding CPUs while
+		in an RCU read-side critical section.
+
+	"N"	This indicates that some of the blocked tasks are preventing
+		the current normal (non-expedited) grace period from
+		completing.
+
+	"E"	This indicates that some of the blocked tasks are preventing
+		the current expedited grace period from completing.
+
+	"B"	This indicates that some of the blocked tasks are in
+		need of RCU priority boosting.
+
+	Each character is replaced with "." if the corresponding
+	condition does not hold.
+
+o	"kt" is the state of the RCU priority-boosting kernel
+	thread associated with the corresponding rcu_node structure.
+	The state can be one of the following:
+
+	"S"	The kernel thread is stopped, in other words, all
+		CPUs corresponding to this rcu_node structure are
+		offline.
+
+	"R"	The kernel thread is running.
+
+	"W"	The kernel thread is waiting because there is no work
+		for it to do.
+
+	"Y"	The kernel thread is yielding to avoid hogging CPU.
+
+	"?"	Unknown value, indicates a bug.
+
+o	"ntb" is the number of tasks boosted.
+
+o	"neb" is the number of tasks boosted in order to complete an
+	expedited grace period.
+
+o	"nnb" is the number of tasks boosted in order to complete a
+	normal (non-expedited) grace period.  When boosting a task
+	that was blocking both an expedited and a normal grace period,
+	it is counted against the expedited total above.
+
+o	"j" is the low-order 16 bits of the jiffies counter in
+	hexadecimal.
+
+o	"bt" is the low-order 16 bits of the value that the jiffies
+	counter will have when we next start boosting, assuming that
+	the current grace period does not end beforehand.  This is
+	also in hexadecimal.
+
+o	"balk: nt" counts the number of times we didn't boost (in
+	other words, we balked) even though it was time to boost because
+	there were no blocked tasks to boost.  This situation occurs
+	when there is one blocked task on one rcu_node structure and
+	none on some other rcu_node structure.
+
+o	"egt" counts the number of times we balked because although
+	there were blocked tasks, none of them were blocking the
+	current grace period, whether expedited or otherwise.
+
+o	"bt" counts the number of times we balked because boosting
+	had already been initiated for the current grace period.
+
+o	"nb" counts the number of times we balked because there
+	was at least one task blocking the current non-expedited grace
+	period that never had blocked.  If it is already running, it
+	just won't help to boost its priority!
+
+o	"ny" counts the number of times we balked because it was
+	not yet time to start boosting.
+
+o	"nos" counts the number of times we balked for other
+	reasons, e.g., the grace period ended first.
+
+
 CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats
 
 These implementations of RCU provides a single debugfs file under the
@@ -394,9 +578,9 @@ o	"neb" is the number of expedited grace periods that have had
 o	"nnb" is the number of normal grace periods that have had
 	to resort to RCU priority boosting since boot.
 
-o	"j" is the low-order 12 bits of the jiffies counter in hexadecimal.
+o	"j" is the low-order 16 bits of the jiffies counter in hexadecimal.
 
-o	"bt" is the low-order 12 bits of the value that the jiffies counter
+o	"bt" is the low-order 16 bits of the value that the jiffies counter
 	will have at the next time that boosting is scheduled to begin.
 
 o	In the line beginning with "normal balk", the fields are as follows:
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index b6ed61c95856..7c163477fcd8 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -52,8 +52,10 @@ Brief summary of control files.
  tasks				 # attach a task(thread) and show list of threads
  cgroup.procs			 # show list of processes
  cgroup.event_control		 # an interface for event_fd()
- memory.usage_in_bytes		 # show current memory(RSS+Cache) usage.
- memory.memsw.usage_in_bytes	 # show current memory+Swap usage
+ memory.usage_in_bytes		 # show current res_counter usage for memory
+				 (See 5.5 for details)
+ memory.memsw.usage_in_bytes	 # show current res_counter usage for memory+Swap
+				 (See 5.5 for details)
  memory.limit_in_bytes		 # set/show limit of memory usage
  memory.memsw.limit_in_bytes	 # set/show limit of memory+Swap usage
  memory.failcnt			 # show the number of memory usage hits limits
@@ -453,6 +455,15 @@ memory under it will be reclaimed.
 You can reset failcnt by writing 0 to failcnt file.
 # echo 0 > .../memory.failcnt
 
+5.5 usage_in_bytes
+
+For efficiency, as other kernel components, memory cgroup uses some optimization
+to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
+method and doesn't show 'exact' value of memory(and swap) usage, it's an fuzz
+value for efficient access. (Of course, when necessary, it's synchronized.)
+If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
+value in memory.stat(see 5.2).
+
 6. Hierarchy support
 
 The memory controller supports a deep hierarchy and hierarchical accounting.
diff --git a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
new file mode 100755
index 000000000000..1a729f089866
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
@@ -0,0 +1,61 @@
+CAN Device Tree Bindings
+------------------------
+2011 Freescale Semiconductor, Inc.
+
+fsl,flexcan-v1.0 nodes
+-----------------------
+In addition to the required compatible-, reg- and interrupt-properties, you can
+also specify which clock source shall be used for the controller.
+
+CPI Clock- Can Protocol Interface Clock
+	This CLK_SRC bit of CTRL(control register) selects the clock source to
+	the CAN Protocol Interface(CPI) to be either the peripheral clock
+	(driven by the PLL) or the crystal oscillator clock. The selected clock
+	is the one fed to the prescaler to generate the Serial Clock (Sclock).
+	The PRESDIV field of CTRL(control register) controls a prescaler that
+	generates the Serial Clock (Sclock), whose period defines the
+	time quantum used to compose the CAN waveform.
+
+Can Engine Clock Source
+	There are two sources for CAN clock
+	- Platform Clock  It represents the bus clock
+	- Oscillator Clock
+
+	Peripheral Clock (PLL)
+	--------------
+		     |
+		    ---------		      -------------
+		    |       |CPI Clock	      | Prescaler |       Sclock
+		    |       |---------------->| (1.. 256) |------------>
+		    ---------		      -------------
+                     |  |
+	--------------  ---------------------CLK_SRC
+	Oscillator Clock
+
+- fsl,flexcan-clock-source : CAN Engine Clock Source.This property selects
+			     the peripheral clock. PLL clock is fed to the
+			     prescaler to generate the Serial Clock (Sclock).
+			     Valid values are "oscillator" and "platform"
+			     "oscillator": CAN engine clock source is oscillator clock.
+			     "platform" The CAN engine clock source is the bus clock
+		             (platform clock).
+
+- fsl,flexcan-clock-divider : for the reference and system clock, an additional
+			      clock divider can be specified.
+- clock-frequency: frequency required to calculate the bitrate for FlexCAN.
+
+Note:
+	- v1.0 of flexcan-v1.0 represent the IP block version for P1010 SOC.
+	- P1010 does not have oscillator as the Clock Source.So the default
+	  Clock Source is platform clock.
+Examples:
+
+	can0@1c000 {
+		compatible = "fsl,flexcan-v1.0";
+		reg = <0x1c000 0x1000>;
+		interrupts = <48 0x2>;
+		interrupt-parent = <&mpic>;
+		fsl,flexcan-clock-source = "platform";
+		fsl,flexcan-clock-divider = <2>;
+		clock-frequency = <fixed by u-boot>;
+	};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt b/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt
new file mode 100644
index 000000000000..939a26d541f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt
@@ -0,0 +1,76 @@
+Integrated Flash Controller
+
+Properties:
+- name : Should be ifc
+- compatible : should contain "fsl,ifc". The version of the integrated
+               flash controller can be found in the IFC_REV register at
+               offset zero.
+
+- #address-cells : Should be either two or three.  The first cell is the
+                   chipselect number, and the remaining cells are the
+                   offset into the chipselect.
+- #size-cells : Either one or two, depending on how large each chipselect
+                can be.
+- reg : Offset and length of the register set for the device
+- interrupts : IFC has two interrupts. The first one is the "common"
+               interrupt(CM_EVTER_STAT), and second is the NAND interrupt
+               (NAND_EVTER_STAT).
+
+- ranges : Each range corresponds to a single chipselect, and covers
+           the entire access window as configured.
+
+Child device nodes describe the devices connected to IFC such as NOR (e.g.
+cfi-flash) and NAND (fsl,ifc-nand). There might be board specific devices
+like FPGAs, CPLDs, etc.
+
+Example:
+
+	ifc@ffe1e000 {
+		compatible = "fsl,ifc", "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0x0 0xffe1e000 0 0x2000>;
+		interrupts = <16 2 19 2>;
+
+		/* NOR, NAND Flashes and CPLD on board */
+		ranges = <0x0 0x0 0x0 0xee000000 0x02000000
+			  0x1 0x0 0x0 0xffa00000 0x00010000
+			  0x3 0x0 0x0 0xffb00000 0x00020000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x2000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				/* 32MB for user data */
+				reg = <0x0 0x02000000>;
+				label = "NOR Data";
+			};
+		};
+
+		flash@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x1 0x0 0x10000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND U-Boot Image";
+				read-only;
+			};
+		};
+
+		cpld@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p1010rdb-cpld";
+			reg = <0x3 0x0 0x000001f>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt
new file mode 100644
index 000000000000..df41958140e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt
@@ -0,0 +1,38 @@
+* Freescale MPIC timers
+
+Required properties:
+- compatible: "fsl,mpic-global-timer"
+
+- reg : Contains two regions.  The first is the main timer register bank
+  (GTCCRxx, GTBCRxx, GTVPRxx, GTDRxx).  The second is the timer control
+  register (TCRx) for the group.
+
+- fsl,available-ranges: use <start count> style section to define which
+  timer interrupts can be used.  This property is optional; without this,
+  all timers within the group can be used.
+
+- interrupts: one interrupt per timer in the group, in order, starting
+  with timer zero.  If timer-available-ranges is present, only the
+  interrupts that correspond to available timers shall be present.
+
+Example:
+	/* Note that this requires #interrupt-cells to be 4 */
+	timer0: timer@41100 {
+		compatible = "fsl,mpic-global-timer";
+		reg = <0x41100 0x100 0x41300 4>;
+
+		/* Another AMP partition is using timers 0 and 1 */
+		fsl,available-ranges = <2 2>;
+
+		interrupts = <2 0 3 0
+		              3 0 3 0>;
+	};
+
+	timer1: timer@42100 {
+		compatible = "fsl,mpic-global-timer";
+		reg = <0x42100 0x100 0x42300 4>;
+		interrupts = <4 0 3 0
+		              5 0 3 0
+		              6 0 3 0
+		              7 0 3 0>;
+	};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
index 4f6145859aab..2cf38bd841fd 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
@@ -190,7 +190,7 @@ EXAMPLE 4
 	 */
 	timer0: timer@41100 {
 		compatible = "fsl,mpic-global-timer";
-		reg = <0x41100 0x100>;
+		reg = <0x41100 0x100 0x41300 4>;
 		interrupts = <0 0 3 0
 		              1 0 3 0
 		              2 0 3 0
diff --git a/Documentation/driver-model/bus.txt b/Documentation/driver-model/bus.txt
index 5001b7511626..6754b2df8aa1 100644
--- a/Documentation/driver-model/bus.txt
+++ b/Documentation/driver-model/bus.txt
@@ -3,24 +3,7 @@ Bus Types
 
 Definition
 ~~~~~~~~~~
-
-struct bus_type {
-	char			* name;
-
-	struct subsystem	subsys;
-	struct kset		drivers;
-	struct kset		devices;
-
-	struct bus_attribute	* bus_attrs;
-	struct device_attribute	* dev_attrs;
-	struct driver_attribute	* drv_attrs;
-
-	int		(*match)(struct device * dev, struct device_driver * drv);
-	int		(*hotplug) (struct device *dev, char **envp, 
-				    int num_envp, char *buffer, int buffer_size);
-	int		(*suspend)(struct device * dev, pm_message_t state);
-	int		(*resume)(struct device * dev);
-};
+See the kerneldoc for the struct bus_type.
 
 int bus_register(struct bus_type * bus);
 
diff --git a/Documentation/driver-model/class.txt b/Documentation/driver-model/class.txt
index 548505f14aa4..1fefc480a80b 100644
--- a/Documentation/driver-model/class.txt
+++ b/Documentation/driver-model/class.txt
@@ -27,22 +27,7 @@ The device class structure looks like:
 typedef int (*devclass_add)(struct device *);
 typedef void (*devclass_remove)(struct device *);
 
-struct device_class {
-	char			* name;
-	rwlock_t		lock;
-	u32			devnum;
-	struct list_head	node;
-
-	struct list_head	drivers;
-	struct list_head	intf_list;
-
-	struct driver_dir_entry	dir;
-	struct driver_dir_entry	device_dir;
-	struct driver_dir_entry	driver_dir;
-
-	devclass_add		add_device;
-	devclass_remove		remove_device;
-};
+See the kerneldoc for the struct class.
 
 A typical device class definition would look like: 
 
diff --git a/Documentation/driver-model/device.txt b/Documentation/driver-model/device.txt
index a124f3126b0d..b2ff42685bcb 100644
--- a/Documentation/driver-model/device.txt
+++ b/Documentation/driver-model/device.txt
@@ -2,96 +2,7 @@
 The Basic Device Structure
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-struct device {
-        struct list_head g_list;
-        struct list_head node;
-        struct list_head bus_list;
-        struct list_head driver_list;
-        struct list_head intf_list;
-        struct list_head children;
-        struct device   * parent;
-
-        char    name[DEVICE_NAME_SIZE];
-        char    bus_id[BUS_ID_SIZE];
-
-        spinlock_t      lock;
-        atomic_t        refcount;
-
-        struct bus_type * bus;
-        struct driver_dir_entry dir;
-
-	u32		class_num;
-
-        struct device_driver *driver;
-        void            *driver_data;
-        void            *platform_data;
-
-        u32             current_state;
-        unsigned char *saved_state;
-
-        void    (*release)(struct device * dev);
-};
-
-Fields 
-~~~~~~
-g_list:	Node in the global device list.
-
-node:	Node in device's parent's children list.
-
-bus_list: Node in device's bus's devices list.
-
-driver_list:   Node in device's driver's devices list.
-
-intf_list:     List of intf_data. There is one structure allocated for
-	       each interface that the device supports.
-
-children:      List of child devices.
-
-parent:        *** FIXME ***
-
-name:	       ASCII description of device. 
-	       Example: " 3Com Corporation 3c905 100BaseTX [Boomerang]"
-
-bus_id:	       ASCII representation of device's bus position. This 
-	       field should be a name unique across all devices on the
-	       bus type the device belongs to. 
-
-	       Example: PCI bus_ids are in the form of
-	       <bus number>:<slot number>.<function number> 
-	       This name is unique across all PCI devices in the system.
-
-lock:	       Spinlock for the device. 
-
-refcount:      Reference count on the device.
-
-bus:	       Pointer to struct bus_type that device belongs to.
-
-dir:	       Device's sysfs directory.
-
-class_num:     Class-enumerated value of the device.
-
-driver:	       Pointer to struct device_driver that controls the device.
-
-driver_data:   Driver-specific data.
-
-platform_data: Platform data specific to the device.
-
-	       Example:  for devices on custom boards, as typical of embedded
-	       and SOC based hardware, Linux often uses platform_data to point
-	       to board-specific structures describing devices and how they
-	       are wired.  That can include what ports are available, chip
-	       variants, which GPIO pins act in what additional roles, and so
-	       on.  This shrinks the "Board Support Packages" (BSPs) and
-	       minimizes board-specific #ifdefs in drivers.
-
-current_state: Current power state of the device.
-
-saved_state:   Pointer to saved state of the device. This is usable by
-	       the device driver controlling the device.
-
-release:       Callback to free the device after all references have 
-	       gone away. This should be set by the allocator of the 
-	       device (i.e. the bus driver that discovered the device).
+See the kerneldoc for the struct device.
 
 
 Programming Interface
diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt
index d2cd6fb8ba9e..4421135826a2 100644
--- a/Documentation/driver-model/driver.txt
+++ b/Documentation/driver-model/driver.txt
@@ -1,23 +1,7 @@
 
 Device Drivers
 
-struct device_driver {
-        char                    * name;
-        struct bus_type         * bus;
-
-        struct completion	unloaded;
-        struct kobject		kobj;
-        list_t                  devices;
-
-        struct module		*owner;
-
-        int     (*probe)        (struct device * dev);
-        int     (*remove)       (struct device * dev);
-
-        int     (*suspend)      (struct device * dev, pm_message_t state);
-        int     (*resume)       (struct device * dev);
-};
-
+See the kerneldoc for the struct device_driver.
 
 
 Allocation
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 46679e48967a..4cba260e3059 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -376,26 +376,6 @@ Who:	Tejun Heo <tj@kernel.org>
 
 ----------------------------
 
-What:	Support for lcd_switch and display_get in asus-laptop driver
-When:	March 2010
-Why:	These two features use non-standard interfaces. There are the
-	only features that really need multiple path to guess what's
-	the right method name on a specific laptop.
-
-	Removing them will allow to remove a lot of code an significantly
-	clean the drivers.
-
-	This will affect the backlight code which won't be able to know
-	if the backlight is on or off. The platform display file will also be
-	write only (like the one in eeepc-laptop).
-
-	This should'nt affect a lot of user because they usually know
-	when their display is on or off.
-
-Who:	Corentin Chary <corentin.chary@gmail.com>
-
-----------------------------
-
 What:	sysfs-class-rfkill state file
 When:	Feb 2014
 Files:	net/rfkill/core.c
@@ -459,14 +439,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 
 ----------------------------
 
-What:	The acpi_sleep=s4_nonvs command line option
-When:	2.6.37
-Files:	arch/x86/kernel/acpi/sleep.c
-Why:	superseded by acpi_sleep=nonvs
-Who:	Rafael J. Wysocki <rjw@sisk.pl>
-
-----------------------------
-
 What: 	PCI DMA unmap state API
 When:	August 2012
 Why:	PCI DMA unmap state API (include/linux/pci-dma.h) was replaced
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index b0b814d75ca1..60740e8ecb37 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -836,7 +836,6 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
  TASKLET:          0          0          0        290
    SCHED:      27035      26983      26971      26746
  HRTIMER:          0          0          0          0
-     RCU:       1678       1769       2178       2250
 
 
 1.3 IDE devices in /proc/ide
diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt
index cb8a3a00cc92..df904aec9904 100644
--- a/Documentation/flexible-arrays.txt
+++ b/Documentation/flexible-arrays.txt
@@ -66,10 +66,10 @@ trick is to ensure that any needed memory allocations are done before
 entering atomic context, using:
 
     int flex_array_prealloc(struct flex_array *array, unsigned int start,
-			    unsigned int end, gfp_t flags);
+			    unsigned int nr_elements, gfp_t flags);
 
 This function will ensure that memory for the elements indexed in the range
-defined by start and end has been allocated.  Thereafter, a
+defined by start and nr_elements has been allocated.  Thereafter, a
 flex_array_put() call on an element in that range is guaranteed not to
 block.
 
diff --git a/Documentation/hwmon/adm1021 b/Documentation/hwmon/adm1021
index 03d02bfb3df1..02ad96cf9b2b 100644
--- a/Documentation/hwmon/adm1021
+++ b/Documentation/hwmon/adm1021
@@ -14,10 +14,6 @@ Supported chips:
     Prefix: 'gl523sm'
     Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
     Datasheet:
-  * Intel Xeon Processor
-    Prefix: - any other - may require 'force_adm1021' parameter
-    Addresses scanned: none
-    Datasheet: Publicly available at Intel website
   * Maxim MAX1617
     Prefix: 'max1617'
     Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
@@ -91,21 +87,27 @@ will do no harm, but will return 'old' values. It is possible to make
 ADM1021-clones do faster measurements, but there is really no good reason
 for that.
 
-Xeon support
-------------
 
-Some Xeon processors have real max1617, adm1021, or compatible chips
-within them, with two temperature sensors.
+Netburst-based Xeon support
+---------------------------
 
-Other Xeons have chips with only one sensor.
+Some Xeon processors based on the Netburst (early Pentium 4, from 2001 to
+2003) microarchitecture had real MAX1617, ADM1021, or compatible chips
+within them, with two temperature sensors. Other Xeon processors of this
+era (with 400 MHz FSB) had chips with only one temperature sensor.
 
-If you have a Xeon, and the adm1021 module loads, and both temperatures
-appear valid, then things are good.
+If you have such an old Xeon, and you get two valid temperatures when
+loading the adm1021 module, then things are good.
 
-If the adm1021 module doesn't load, you should try this:
-	modprobe adm1021 force_adm1021=BUS,ADDRESS
-	ADDRESS can only be 0x18, 0x1a, 0x29, 0x2b, 0x4c, or 0x4e.
+If nothing happens when loading the adm1021 module, and you are certain
+that your specific Xeon processor model includes compatible sensors, you
+will have to explicitly instantiate the sensor chips from user-space. See
+method 4 in Documentation/i2c/instantiating-devices. Possible slave
+addresses are 0x18, 0x1a, 0x29, 0x2b, 0x4c, or 0x4e. It is likely that
+only temp2 will be correct and temp1 will have to be ignored.
 
-If you have dual Xeons you may have appear to have two separate
-adm1021-compatible chips, or two single-temperature sensors, at distinct
-addresses.
+Previous generations of the Xeon processor (based on Pentium II/III)
+didn't have these sensors. Next generations of Xeon processors (533 MHz
+FSB and faster) lost them, until the Core-based generation which
+introduced integrated digital thermal sensors. These are supported by
+the coretemp driver.
diff --git a/Documentation/hwmon/lm90 b/Documentation/hwmon/lm90
index fa475c0a48a3..f3efd18e87f4 100644
--- a/Documentation/hwmon/lm90
+++ b/Documentation/hwmon/lm90
@@ -32,6 +32,16 @@ Supported chips:
     Addresses scanned: I2C 0x4c and 0x4d
     Datasheet: Publicly available at the ON Semiconductor website
                http://www.onsemi.com/PowerSolutions/product.do?id=ADT7461
+  * Analog Devices ADT7461A
+    Prefix: 'adt7461a'
+    Addresses scanned: I2C 0x4c and 0x4d
+    Datasheet: Publicly available at the ON Semiconductor website
+               http://www.onsemi.com/PowerSolutions/product.do?id=ADT7461A
+  * ON Semiconductor NCT1008
+    Prefix: 'nct1008'
+    Addresses scanned: I2C 0x4c and 0x4d
+    Datasheet: Publicly available at the ON Semiconductor website
+               http://www.onsemi.com/PowerSolutions/product.do?id=NCT1008
   * Maxim MAX6646
     Prefix: 'max6646'
     Addresses scanned: I2C 0x4d
@@ -149,7 +159,7 @@ ADM1032:
   * ALERT is triggered by open remote sensor.
   * SMBus PEC support for Write Byte and Receive Byte transactions.
 
-ADT7461:
+ADT7461, ADT7461A, NCT1008:
   * Extended temperature range (breaks compatibility)
   * Lower resolution for remote temperature
 
@@ -195,9 +205,9 @@ are exported, one for each channel, but these values are of course linked.
 Only the local hysteresis can be set from user-space, and the same delta
 applies to the remote hysteresis.
 
-The lm90 driver will not update its values more frequently than every
-other second; reading them more often will do no harm, but will return
-'old' values.
+The lm90 driver will not update its values more frequently than configured with
+the update_interval attribute; reading them more often will do no harm, but will
+return 'old' values.
 
 SMBus Alert Support
 -------------------
@@ -205,11 +215,12 @@ SMBus Alert Support
 This driver has basic support for SMBus alert. When an alert is received,
 the status register is read and the faulty temperature channel is logged.
 
-The Analog Devices chips (ADM1032 and ADT7461) do not implement the SMBus
-alert protocol properly so additional care is needed: the ALERT output is
-disabled when an alert is received, and is re-enabled only when the alarm
-is gone. Otherwise the chip would block alerts from other chips in the bus
-as long as the alarm is active.
+The Analog Devices chips (ADM1032, ADT7461 and ADT7461A) and ON
+Semiconductor chips (NCT1008) do not implement the SMBus alert protocol
+properly so additional care is needed: the ALERT output is disabled when
+an alert is received, and is re-enabled only when the alarm is gone.
+Otherwise the chip would block alerts from other chips in the bus as long
+as the alarm is active.
 
 PEC Support
 -----------
diff --git a/Documentation/hwmon/max16064 b/Documentation/hwmon/max16064
new file mode 100644
index 000000000000..41728999e142
--- /dev/null
+++ b/Documentation/hwmon/max16064
@@ -0,0 +1,62 @@
+Kernel driver max16064
+======================
+
+Supported chips:
+  * Maxim MAX16064
+    Prefix: 'max16064'
+    Addresses scanned: -
+    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf
+
+Author: Guenter Roeck <guenter.roeck@ericsson.com>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Maxim MAX16064 Quad Power-Supply
+Controller with Active-Voltage Output Control and PMBus Interface.
+
+The driver is a client driver to the core PMBus driver.
+Please see Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in[1-4]_label		"vout[1-4]"
+in[1-4]_input		Measured voltage. From READ_VOUT register.
+in[1-4]_min		Minumum Voltage. From VOUT_UV_WARN_LIMIT register.
+in[1-4]_max		Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in[1-4]_lcrit		Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register.
+in[1-4]_crit		Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in[1-4]_min_alarm	Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in[1-4]_max_alarm	Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in[1-4]_lcrit_alarm	Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in[1-4]_crit_alarm	Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+
+temp1_input		Measured temperature. From READ_TEMPERATURE_1 register.
+temp1_max		Maximum temperature. From OT_WARN_LIMIT register.
+temp1_crit		Critical high temperature. From OT_FAULT_LIMIT register.
+temp1_max_alarm		Chip temperature high alarm. Set by comparing
+			READ_TEMPERATURE_1 with OT_WARN_LIMIT if TEMP_OT_WARNING
+			status is set.
+temp1_crit_alarm	Chip temperature critical high alarm. Set by comparing
+			READ_TEMPERATURE_1 with OT_FAULT_LIMIT if TEMP_OT_FAULT
+			status is set.
diff --git a/Documentation/hwmon/max34440 b/Documentation/hwmon/max34440
new file mode 100644
index 000000000000..6c525dd07d59
--- /dev/null
+++ b/Documentation/hwmon/max34440
@@ -0,0 +1,79 @@
+Kernel driver max34440
+======================
+
+Supported chips:
+  * Maxim MAX34440
+    Prefixes: 'max34440'
+    Addresses scanned: -
+    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34440.pdf
+  * Maxim MAX34441
+    PMBus 5-Channel Power-Supply Manager and Intelligent Fan Controller
+    Prefixes: 'max34441'
+    Addresses scanned: -
+    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34441.pdf
+
+Author: Guenter Roeck <guenter.roeck@ericsson.com>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Maxim MAX34440 PMBus 6-Channel
+Power-Supply Manager and MAX34441 PMBus 5-Channel Power-Supply Manager
+and Intelligent Fan Controller.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in[1-6]_label		"vout[1-6]".
+in[1-6]_input		Measured voltage. From READ_VOUT register.
+in[1-6]_min		Minumum Voltage. From VOUT_UV_WARN_LIMIT register.
+in[1-6]_max		Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in[1-6]_lcrit		Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register.
+in[1-6]_crit		Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in[1-6]_min_alarm	Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in[1-6]_max_alarm	Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in[1-6]_lcrit_alarm	Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in[1-6]_crit_alarm	Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+
+curr[1-6]_label		"iout[1-6]".
+curr[1-6]_input		Measured current. From READ_IOUT register.
+curr[1-6]_max		Maximum current. From IOUT_OC_WARN_LIMIT register.
+curr[1-6]_crit		Critical maximum current. From IOUT_OC_FAULT_LIMIT register.
+curr[1-6]_max_alarm	Current high alarm. From IOUT_OC_WARNING status.
+curr[1-6]_crit_alarm	Current critical high alarm. From IOUT_OC_FAULT status.
+
+			in6 and curr6 attributes only exist for MAX34440.
+
+temp[1-8]_input		Measured temperatures. From READ_TEMPERATURE_1 register.
+			temp1 is the chip's internal temperature. temp2..temp5
+			are remote I2C temperature sensors. For MAX34441, temp6
+			is a remote thermal-diode sensor. For MAX34440, temp6..8
+			are remote I2C temperature sensors.
+temp[1-8]_max		Maximum temperature. From OT_WARN_LIMIT register.
+temp[1-8]_crit		Critical high temperature. From OT_FAULT_LIMIT register.
+temp[1-8]_max_alarm	Temperature high alarm.
+temp[1-8]_crit_alarm	Temperature critical high alarm.
+
+			temp7 and temp8 attributes only exist for MAX34440.
diff --git a/Documentation/hwmon/max8688 b/Documentation/hwmon/max8688
new file mode 100644
index 000000000000..0ddd3a412030
--- /dev/null
+++ b/Documentation/hwmon/max8688
@@ -0,0 +1,69 @@
+Kernel driver max8688
+=====================
+
+Supported chips:
+  * Maxim MAX8688
+    Prefix: 'max8688'
+    Addresses scanned: -
+    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf
+
+Author: Guenter Roeck <guenter.roeck@ericsson.com>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Maxim MAX8688 Digital Power-Supply
+Controller/Monitor with PMBus Interface.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in1_label		"vout1"
+in1_input		Measured voltage. From READ_VOUT register.
+in1_min			Minumum Voltage. From VOUT_UV_WARN_LIMIT register.
+in1_max			Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in1_lcrit		Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register.
+in1_crit		Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in1_min_alarm		Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in1_max_alarm		Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in1_lcrit_alarm		Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in1_crit_alarm		Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+
+curr1_label		"iout1"
+curr1_input		Measured current. From READ_IOUT register.
+curr1_max		Maximum current. From IOUT_OC_WARN_LIMIT register.
+curr1_crit		Critical maximum current. From IOUT_OC_FAULT_LIMIT register.
+curr1_max_alarm		Current high alarm. From IOUT_OC_WARN_LIMIT register.
+curr1_crit_alarm	Current critical high alarm. From IOUT_OC_FAULT status.
+
+temp1_input		Measured temperature. From READ_TEMPERATURE_1 register.
+temp1_max		Maximum temperature. From OT_WARN_LIMIT register.
+temp1_crit		Critical high temperature. From OT_FAULT_LIMIT register.
+temp1_max_alarm		Chip temperature high alarm. Set by comparing
+			READ_TEMPERATURE_1 with OT_WARN_LIMIT if TEMP_OT_WARNING
+			status is set.
+temp1_crit_alarm	Chip temperature critical high alarm. Set by comparing
+			READ_TEMPERATURE_1 with OT_FAULT_LIMIT if TEMP_OT_FAULT
+			status is set.
diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus
index dc4933e96344..5e462fc7f99b 100644
--- a/Documentation/hwmon/pmbus
+++ b/Documentation/hwmon/pmbus
@@ -13,26 +13,6 @@ Supported chips:
     Prefix: 'ltc2978'
     Addresses scanned: -
     Datasheet: http://cds.linear.com/docs/Datasheet/2978fa.pdf
-  * Maxim MAX16064
-    Quad Power-Supply Controller
-    Prefix: 'max16064'
-    Addresses scanned: -
-    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf
-  * Maxim MAX34440
-    PMBus 6-Channel Power-Supply Manager
-    Prefixes: 'max34440'
-    Addresses scanned: -
-    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34440.pdf
-  * Maxim MAX34441
-    PMBus 5-Channel Power-Supply Manager and Intelligent Fan Controller
-    Prefixes: 'max34441'
-    Addresses scanned: -
-    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34441.pdf
-  * Maxim MAX8688
-    Digital Power-Supply Controller/Monitor
-    Prefix: 'max8688'
-    Addresses scanned: -
-    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf
   * Generic PMBus devices
     Prefix: 'pmbus'
     Addresses scanned: -
@@ -175,11 +155,13 @@ currX_crit		Critical maximum current.
 			From IIN_OC_FAULT_LIMIT or IOUT_OC_FAULT_LIMIT register.
 currX_alarm		Current high alarm.
 			From IIN_OC_WARNING or IOUT_OC_WARNING status.
+currX_max_alarm		Current high alarm.
+			From IIN_OC_WARN_LIMIT or IOUT_OC_WARN_LIMIT status.
 currX_lcrit_alarm	Output current critical low alarm.
 			From IOUT_UC_FAULT status.
 currX_crit_alarm	Current critical high alarm.
 			From IIN_OC_FAULT or IOUT_OC_FAULT status.
-currX_label		"iin" or "vinY"
+currX_label		"iin" or "ioutY"
 
 powerX_input		Measured power. From READ_PIN or READ_POUT register.
 powerX_cap		Output power cap. From POUT_MAX register.
@@ -193,13 +175,13 @@ powerX_crit_alarm	Output power critical high alarm.
 			From POUT_OP_FAULT status.
 powerX_label		"pin" or "poutY"
 
-tempX_input		Measured tempererature.
+tempX_input		Measured temperature.
 			From READ_TEMPERATURE_X register.
-tempX_min		Mimimum tempererature. From UT_WARN_LIMIT register.
-tempX_max		Maximum tempererature. From OT_WARN_LIMIT register.
-tempX_lcrit		Critical low tempererature.
+tempX_min		Mimimum temperature. From UT_WARN_LIMIT register.
+tempX_max		Maximum temperature. From OT_WARN_LIMIT register.
+tempX_lcrit		Critical low temperature.
 			From UT_FAULT_LIMIT register.
-tempX_crit		Critical high tempererature.
+tempX_crit		Critical high temperature.
 			From OT_FAULT_LIMIT register.
 tempX_min_alarm		Chip temperature low alarm. Set by comparing
 			READ_TEMPERATURE_X with UT_WARN_LIMIT if
diff --git a/Documentation/hwmon/smm665 b/Documentation/hwmon/smm665
index 3820fc9ca52d..59e316140542 100644
--- a/Documentation/hwmon/smm665
+++ b/Documentation/hwmon/smm665
@@ -150,8 +150,8 @@ in8_crit_alarm		Channel F critical alarm
 in9_crit_alarm		AIN1 critical alarm
 in10_crit_alarm		AIN2 critical alarm
 
-temp1_input		Chip tempererature
-temp1_min		Mimimum chip tempererature
-temp1_max		Maximum chip tempererature
-temp1_crit		Critical chip tempererature
+temp1_input		Chip temperature
+temp1_min		Mimimum chip temperature
+temp1_max		Maximum chip temperature
+temp1_crit		Critical chip temperature
 temp1_crit_alarm	Temperature critical alarm
diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches
new file mode 100644
index 000000000000..86f42e8e9e49
--- /dev/null
+++ b/Documentation/hwmon/submitting-patches
@@ -0,0 +1,109 @@
+	How to Get Your Patch Accepted Into the Hwmon Subsystem
+	-------------------------------------------------------
+
+This text is is a collection of suggestions for people writing patches or
+drivers for the hwmon subsystem. Following these suggestions will greatly
+increase the chances of your change being accepted.
+
+
+1. General
+----------
+
+* It should be unnecessary to mention, but please read and follow
+    Documentation/SubmitChecklist
+    Documentation/SubmittingDrivers
+    Documentation/SubmittingPatches
+    Documentation/CodingStyle
+
+* If your patch generates checkpatch warnings, please refrain from explanations
+  such as "I don't like that coding style". Keep in mind that each unnecessary
+  warning helps hiding a real problem. If you don't like the kernel coding
+  style, don't write kernel drivers.
+
+* Please test your patch thoroughly. We are not your test group.
+  Sometimes a patch can not or not completely be tested because of missing
+  hardware. In such cases, you should test-build the code on at least one
+  architecture. If run-time testing was not achieved, it should be written
+  explicitly below the patch header.
+
+* If your patch (or the driver) is affected by configuration options such as
+  CONFIG_SMP or CONFIG_HOTPLUG, make sure it compiles for all configuration
+  variants.
+
+
+2. Adding functionality to existing drivers
+-------------------------------------------
+
+* Make sure the documentation in Documentation/hwmon/<driver_name> is up to
+  date.
+
+* Make sure the information in Kconfig is up to date.
+
+* If the added functionality requires some cleanup or structural changes, split
+  your patch into a cleanup part and the actual addition. This makes it easier
+  to review your changes, and to bisect any resulting problems.
+
+* Never mix bug fixes, cleanup, and functional enhancements in a single patch.
+
+
+3. New drivers
+--------------
+
+* Running your patch or driver file(s) through checkpatch does not mean its
+  formatting is clean. If unsure about formatting in your new driver, run it
+  through Lindent. Lindent is not perfect, and you may have to do some minor
+  cleanup, but it is a good start.
+
+* Consider adding yourself to MAINTAINERS.
+
+* Document the driver in Documentation/hwmon/<driver_name>.
+
+* Add the driver to Kconfig and Makefile in alphabetical order.
+
+* Make sure that all dependencies are listed in Kconfig. For new drivers, it
+  is most likely prudent to add a dependency on EXPERIMENTAL.
+
+* Avoid forward declarations if you can. Rearrange the code if necessary.
+
+* Avoid calculations in macros and macro-generated functions. While such macros
+  may save a line or so in the source, it obfuscates the code and makes code
+  review more difficult. It may also result in code which is more complicated
+  than necessary. Use inline functions or just regular functions instead.
+
+* If the driver has a detect function, make sure it is silent. Debug messages
+  and messages printed after a successful detection are acceptable, but it
+  must not print messages such as "Chip XXX not found/supported".
+
+  Keep in mind that the detect function will run for all drivers supporting an
+  address if a chip is detected on that address. Unnecessary messages will just
+  pollute the kernel log and not provide any value.
+
+* Provide a detect function if and only if a chip can be detected reliably.
+
+* Avoid writing to chip registers in the detect function. If you have to write,
+  only do it after you have already gathered enough data to be certain that the
+  detection is going to be successful.
+
+  Keep in mind that the chip might not be what your driver believes it is, and
+  writing to it might cause a bad misconfiguration.
+
+* Make sure there are no race conditions in the probe function. Specifically,
+  completely initialize your chip first, then create sysfs entries and register
+  with the hwmon subsystem.
+
+* Do not provide support for deprecated sysfs attributes.
+
+* Do not create non-standard attributes unless really needed. If you have to use
+  non-standard attributes, or you believe you do, discuss it on the mailing list
+  first. Either case, provide a detailed explanation why you need the
+  non-standard attribute(s).
+  Standard attributes are specified in Documentation/hwmon/sysfs-interface.
+
+* When deciding which sysfs attributes to support, look at the chip's
+  capabilities. While we do not expect your driver to support everything the
+  chip may offer, it should at least support all limits and alarms.
+
+* Last but not least, please check if a driver for your chip already exists
+  before starting to write a new driver. Especially for temperature sensors,
+  new chips are often variants of previously released chips. In some cases,
+  a presumably new chip may simply have been relabeled.
diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt
new file mode 100644
index 000000000000..23fcb05175be
--- /dev/null
+++ b/Documentation/input/event-codes.txt
@@ -0,0 +1,262 @@
+The input protocol uses a map of types and codes to express input device values
+to userspace. This document describes the types and codes and how and when they
+may be used.
+
+A single hardware event generates multiple input events. Each input event
+contains the new value of a single data item. A special event type, EV_SYN, is
+used to separate input events into packets of input data changes occurring at
+the same moment in time. In the following, the term "event" refers to a single
+input event encompassing a type, code, and value.
+
+The input protocol is a stateful protocol. Events are emitted only when values
+of event codes have changed. However, the state is maintained within the Linux
+input subsystem; drivers do not need to maintain the state and may attempt to
+emit unchanged values without harm. Userspace may obtain the current state of
+event code values using the EVIOCG* ioctls defined in linux/input.h. The event
+reports supported by a device are also provided by sysfs in
+class/input/event*/device/capabilities/, and the properties of a device are
+provided in class/input/event*/device/properties.
+
+Types:
+==========
+Types are groupings of codes under a logical input construct. Each type has a
+set of applicable codes to be used in generating events. See the Codes section
+for details on valid codes for each type.
+
+* EV_SYN:
+  - Used as markers to separate events. Events may be separated in time or in
+    space, such as with the multitouch protocol.
+
+* EV_KEY:
+  - Used to describe state changes of keyboards, buttons, or other key-like
+    devices.
+
+* EV_REL:
+  - Used to describe relative axis value changes, e.g. moving the mouse 5 units
+    to the left.
+
+* EV_ABS:
+  - Used to describe absolute axis value changes, e.g. describing the
+    coordinates of a touch on a touchscreen.
+
+* EV_MSC:
+  - Used to describe miscellaneous input data that do not fit into other types.
+
+* EV_SW:
+  - Used to describe binary state input switches.
+
+* EV_LED:
+  - Used to turn LEDs on devices on and off.
+
+* EV_SND:
+  - Used to output sound to devices.
+
+* EV_REP:
+  - Used for autorepeating devices.
+
+* EV_FF:
+  - Used to send force feedback commands to an input device.
+
+* EV_PWR:
+  - A special type for power button and switch input.
+
+* EV_FF_STATUS:
+  - Used to receive force feedback device status.
+
+Codes:
+==========
+Codes define the precise type of event.
+
+EV_SYN:
+----------
+EV_SYN event values are undefined. Their usage is defined only by when they are
+sent in the evdev event stream.
+
+* SYN_REPORT:
+  - Used to synchronize and separate events into packets of input data changes
+    occurring at the same moment in time. For example, motion of a mouse may set
+    the REL_X and REL_Y values for one motion, then emit a SYN_REPORT. The next
+    motion will emit more REL_X and REL_Y values and send another SYN_REPORT.
+
+* SYN_CONFIG:
+  - TBD
+
+* SYN_MT_REPORT:
+  - Used to synchronize and separate touch events. See the
+    multi-touch-protocol.txt document for more information.
+
+* SYN_DROPPED:
+  - Used to indicate buffer overrun in the evdev client's event queue.
+    Client should ignore all events up to and including next SYN_REPORT
+    event and query the device (using EVIOCG* ioctls) to obtain its
+    current state.
+
+EV_KEY:
+----------
+EV_KEY events take the form KEY_<name> or BTN_<name>. For example, KEY_A is used
+to represent the 'A' key on a keyboard. When a key is depressed, an event with
+the key's code is emitted with value 1. When the key is released, an event is
+emitted with value 0. Some hardware send events when a key is repeated. These
+events have a value of 2. In general, KEY_<name> is used for keyboard keys, and
+BTN_<name> is used for other types of momentary switch events.
+
+A few EV_KEY codes have special meanings:
+
+* BTN_TOOL_<name>:
+  - These codes are used in conjunction with input trackpads, tablets, and
+    touchscreens. These devices may be used with fingers, pens, or other tools.
+    When an event occurs and a tool is used, the corresponding BTN_TOOL_<name>
+    code should be set to a value of 1. When the tool is no longer interacting
+    with the input device, the BTN_TOOL_<name> code should be reset to 0. All
+    trackpads, tablets, and touchscreens should use at least one BTN_TOOL_<name>
+    code when events are generated.
+
+* BTN_TOUCH:
+    BTN_TOUCH is used for touch contact. While an input tool is determined to be
+    within meaningful physical contact, the value of this property must be set
+    to 1. Meaningful physical contact may mean any contact, or it may mean
+    contact conditioned by an implementation defined property. For example, a
+    touchpad may set the value to 1 only when the touch pressure rises above a
+    certain value. BTN_TOUCH may be combined with BTN_TOOL_<name> codes. For
+    example, a pen tablet may set BTN_TOOL_PEN to 1 and BTN_TOUCH to 0 while the
+    pen is hovering over but not touching the tablet surface.
+
+Note: For appropriate function of the legacy mousedev emulation driver,
+BTN_TOUCH must be the first evdev code emitted in a synchronization frame.
+
+Note: Historically a touch device with BTN_TOOL_FINGER and BTN_TOUCH was
+interpreted as a touchpad by userspace, while a similar device without
+BTN_TOOL_FINGER was interpreted as a touchscreen. For backwards compatibility
+with current userspace it is recommended to follow this distinction. In the
+future, this distinction will be deprecated and the device properties ioctl
+EVIOCGPROP, defined in linux/input.h, will be used to convey the device type.
+
+* BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP, BTN_TOOL_TRIPLETAP, BTN_TOOL_QUADTAP:
+  - These codes denote one, two, three, and four finger interaction on a
+    trackpad or touchscreen. For example, if the user uses two fingers and moves
+    them on the touchpad in an effort to scroll content on screen,
+    BTN_TOOL_DOUBLETAP should be set to value 1 for the duration of the motion.
+    Note that all BTN_TOOL_<name> codes and the BTN_TOUCH code are orthogonal in
+    purpose. A trackpad event generated by finger touches should generate events
+    for one code from each group. At most only one of these BTN_TOOL_<name>
+    codes should have a value of 1 during any synchronization frame.
+
+Note: Historically some drivers emitted multiple of the finger count codes with
+a value of 1 in the same synchronization frame. This usage is deprecated.
+
+Note: In multitouch drivers, the input_mt_report_finger_count() function should
+be used to emit these codes. Please see multi-touch-protocol.txt for details.
+
+EV_REL:
+----------
+EV_REL events describe relative changes in a property. For example, a mouse may
+move to the left by a certain number of units, but its absolute position in
+space is unknown. If the absolute position is known, EV_ABS codes should be used
+instead of EV_REL codes.
+
+A few EV_REL codes have special meanings:
+
+* REL_WHEEL, REL_HWHEEL:
+  - These codes are used for vertical and horizontal scroll wheels,
+    respectively.
+
+EV_ABS:
+----------
+EV_ABS events describe absolute changes in a property. For example, a touchpad
+may emit coordinates for a touch location.
+
+A few EV_ABS codes have special meanings:
+
+* ABS_DISTANCE:
+  - Used to describe the distance of a tool from an interaction surface. This
+    event should only be emitted while the tool is hovering, meaning in close
+    proximity of the device and while the value of the BTN_TOUCH code is 0. If
+    the input device may be used freely in three dimensions, consider ABS_Z
+    instead.
+
+* ABS_MT_<name>:
+  - Used to describe multitouch input events. Please see
+    multi-touch-protocol.txt for details.
+
+EV_SW:
+----------
+EV_SW events describe stateful binary switches. For example, the SW_LID code is
+used to denote when a laptop lid is closed.
+
+Upon binding to a device or resuming from suspend, a driver must report
+the current switch state. This ensures that the device, kernel, and userspace
+state is in sync.
+
+Upon resume, if the switch state is the same as before suspend, then the input
+subsystem will filter out the duplicate switch state reports. The driver does
+not need to keep the state of the switch at any time.
+
+EV_MSC:
+----------
+EV_MSC events are used for input and output events that do not fall under other
+categories.
+
+EV_LED:
+----------
+EV_LED events are used for input and output to set and query the state of
+various LEDs on devices.
+
+EV_REP:
+----------
+EV_REP events are used for specifying autorepeating events.
+
+EV_SND:
+----------
+EV_SND events are used for sending sound commands to simple sound output
+devices.
+
+EV_FF:
+----------
+EV_FF events are used to initialize a force feedback capable device and to cause
+such device to feedback.
+
+EV_PWR:
+----------
+EV_PWR events are a special type of event used specifically for power
+mangement. Its usage is not well defined. To be addressed later.
+
+Guidelines:
+==========
+The guidelines below ensure proper single-touch and multi-finger functionality.
+For multi-touch functionality, see the multi-touch-protocol.txt document for
+more information.
+
+Mice:
+----------
+REL_{X,Y} must be reported when the mouse moves. BTN_LEFT must be used to report
+the primary button press. BTN_{MIDDLE,RIGHT,4,5,etc.} should be used to report
+further buttons of the device. REL_WHEEL and REL_HWHEEL should be used to report
+scroll wheel events where available.
+
+Touchscreens:
+----------
+ABS_{X,Y} must be reported with the location of the touch. BTN_TOUCH must be
+used to report when a touch is active on the screen.
+BTN_{MOUSE,LEFT,MIDDLE,RIGHT} must not be reported as the result of touch
+contact. BTN_TOOL_<name> events should be reported where possible.
+
+Trackpads:
+----------
+Legacy trackpads that only provide relative position information must report
+events like mice described above.
+
+Trackpads that provide absolute touch position must report ABS_{X,Y} for the
+location of the touch. BTN_TOUCH should be used to report when a touch is active
+on the trackpad. Where multi-finger support is available, BTN_TOOL_<name> should
+be used to report the number of touches active on the trackpad.
+
+Tablets:
+----------
+BTN_TOOL_<name> events must be reported when a stylus or other tool is active on
+the tablet. ABS_{X,Y} must be reported with the location of the tool. BTN_TOUCH
+should be used to report when the tool is in contact with the tablet.
+BTN_{STYLUS,STYLUS2} should be used to report buttons on the tool itself. Any
+button may be used for buttons on the tablet except BTN_{MOUSE,LEFT}.
+BTN_{0,1,2,etc} are good generic codes for unlabeled buttons. Do not use
+meaningful buttons, like BTN_FORWARD, unless the button is labeled for that
+purpose on the device.
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index b63301a03811..050d37fe6d40 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -11,14 +11,14 @@ for non English (read: Japanese) speakers and is not intended as a
 fork. So if you have any comments or updates for this file, please try
 to update the original English file first.
 
-Last Updated: 2008/10/24
+Last Updated: 2011/03/31
 ==================================
 これは、
-linux-2.6.28/Documentation/HOWTO
+linux-2.6.38/Documentation/HOWTO
 の和訳です。
 
 翻訳団体： JF プロジェクト < http://www.linux.or.jp/JF/ >
-翻訳日： 2008/10/24
+翻訳日： 2011/3/28
 翻訳者： Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
 校正者： 松倉さん <nbh--mats at nifty dot com>
          小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
@@ -256,8 +256,8 @@ Linux カーネルの開発プロセスは現在幾つかの異なるメイン�
   - メインの 2.6.x カーネルツリー
   - 2.6.x.y -stable カーネルツリー
   - 2.6.x -git カーネルパッチ
-  - 2.6.x -mm カーネルパッチ
   - サブシステム毎のカーネルツリーとパッチ
+  - 統合テストのための 2.6.x -next カーネルツリー
 
 2.6.x カーネルツリー
 -----------------
@@ -268,9 +268,9 @@ Linux カーネルの開発プロセスは現在幾つかの異なるメイン�
 
   - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
     この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
-    このような差分は通常 -mm カーネルに数週間含まれてきたパッチです。
+    このような差分は通常 -next カーネルに数週間含まれてきたパッチです。
     大きな変更は git(カーネルのソース管理ツール、詳細は
-    http://git.or.cz/  参照) を使って送るのが好ましいやり方ですが、パッ
+    http://git-scm.com/  参照) を使って送るのが好ましいやり方ですが、パッ
     チファイルの形式のまま送るのでも十分です。
 
   - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
@@ -333,86 +333,44 @@ git リポジトリで管理されているLinus のカーネルツリーの毎�
 れは -rc カーネルと比べて、パッチが大丈夫かどうかも確認しないで自動的
 に生成されるので、より実験的です。
 
-2.6.x -mm カーネルパッチ
-------------------------
-
-Andrew Morton によってリリースされる実験的なカーネルパッチ群です。
-Andrew は個別のサブシステムカーネルツリーとパッチを全て集めてきて
-linux-kernel メーリングリストで収集された多数のパッチと同時に一つにま
-とめます。
-このツリーは新機能とパッチが検証される場となります。ある期間の間パッチ
-が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、
-メインラインへ入れるように Linus にプッシュします。
-
-メインカーネルツリーに含めるために Linus に送る前に、すべての新しいパッ
-チが -mm ツリーでテストされることが強く推奨されています。マージウィン
-ドウが開く前に -mm ツリーに現れなかったパッチはメインラインにマージさ
-れることは困難になります。
-
-これらのカーネルは安定して動作すべきシステムとして使うのには適切ではあ
-りませんし、カーネルブランチの中でももっとも動作にリスクが高いものです。
-
-もしあなたが、カーネル開発プロセスの支援をしたいと思っているのであれば、
-どうぞこれらのカーネルリリースをテストに使ってみて、そしてもし問題があ
-れば、またもし全てが正しく動作したとしても、linux-kernel メーリングリ
-ストにフィードバックを提供してください。
-
-すべての他の実験的パッチに加えて、これらのカーネルは通常リリース時点で
-メインラインの -git カーネルに含まれる全ての変更も含んでいます。
-
--mm カーネルは決まったスケジュールではリリースされません、しかし通常幾
-つかの -mm カーネル (1 から 3 が普通）が各-rc カーネルの間にリリースさ
-れます。
-
 サブシステム毎のカーネルツリーとパッチ
 -------------------------------------------
 
-カーネルの様々な領域で何が起きているかを見られるようにするため、多くの
-カーネルサブシステム開発者は彼らの開発ツリーを公開しています。これらの
-ツリーは説明したように -mm カーネルリリースに入れ込まれます。
-
-以下はさまざまなカーネルツリーの中のいくつかのリスト-
-
-  git ツリー-
-    - Kbuild の開発ツリー、Sam Ravnborg <sam@ravnborg.org>
-	git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
-
-    - ACPI の開発ツリー、 Len Brown <len.brown@intel.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
-
-    - Block の開発ツリー、Jens Axboe <axboe@suse.de>
-	git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
-
-    - DRM の開発ツリー、Dave Airlie <airlied@linux.ie>
-	git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
-
-    - ia64 の開発ツリー、Tony Luck <tony.luck@intel.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
-
-    - infiniband, Roland Dreier <rolandd@cisco.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
-
-    - libata, Jeff Garzik <jgarzik@pobox.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
-
-    - ネットワークドライバ, Jeff Garzik <jgarzik@pobox.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
-
-    - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net>
-	git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
-
-    - SCSI, James Bottomley <James.Bottomley@hansenpartnership.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
-
-    - x86, Ingo Molnar <mingo@elte.hu>
-	git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
-
-  quilt ツリー-
-    - USB, ドライバコアと I2C, Greg Kroah-Hartman <gregkh@suse.de>
-	kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
+それぞれのカーネルサブシステムのメンテナ達は --- そして多くのカーネル
+サブシステムの開発者達も --- 各自の最新の開発状況をソースリポジトリに
+公開しています。そのため、自分とは異なる領域のカーネルで何が起きている
+かを他の人が見られるようになっています。開発が早く進んでいる領域では、
+開発者は自身の投稿がどのサブシステムカーネルツリーを元にしているか質問
+されるので、その投稿とすでに進行中の他の作業との衝突が避けられます。
+
+大部分のこれらのリポジトリは git ツリーです。しかしその他の SCM や
+quilt シリーズとして公開されているパッチキューも使われています。これら
+のサブシステムリポジトリのアドレスは MAINTAINERS ファイルにリストされ
+ています。これらの多くは http://git.kernel.org/ で参照することができま
+す。
 
-  その他のカーネルツリーは http://git.kernel.org/ と MAINTAINERS ファ
-  イルに一覧表があります。
+提案されたパッチがこのようなサブシステムツリーにコミットされる前に、メー
+リングリストで事前にレビューにかけられます（以下の対応するセクションを
+参照）。いくつかのカーネルサブシステムでは、このレビューは patchwork
+というツールによって追跡されます。Patchwork は web インターフェイスに
+よってパッチ投稿の表示、パッチへのコメント付けや改訂などができ、そして
+メンテナはパッチに対して、レビュー中、受付済み、拒否というようなマーク
+をつけることができます。大部分のこれらの patchwork のサイトは
+http://patchwork.kernel.org/ でリストされています。
+
+統合テストのための 2.6.x -next カーネルツリー
+---------------------------------------------
+
+サブシステムツリーの更新内容がメインラインの 2.6.x ツリーにマージされ
+る前に、それらは統合テストされる必要があります。この目的のため、実質的
+に全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリ
+ポジトリが存在します-
+       http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
+       http://linux.f-seidel.de/linux-next/pmwiki/
+
+このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン
+ラインカーネルにマージされるか、おおまかなの展望を提供します。-next 
+カーネルの実行テストを行う冒険好きなテスターは大いに歓迎されます
 
 バグレポート
 -------------
@@ -673,10 +631,9 @@ Linux カーネルコミュニティは、一度に大量のコードの塊を�
 じところからスタートしたのですから。
 
 Paolo Ciarrocchi に感謝、彼は彼の書いた "Development Process"
-(http://linux.tar.bz/articles/2.6-development_process)セクショ
-ンをこのテキストの原型にすることを許可してくれました。
-Rundy Dunlap と Gerrit Huizenga はメーリングリストでやるべきこととやっ
-てはいけないことのリストを提供してくれました。
+(http://lwn.net/Articles/94386/) セクションをこのテキストの原型にする
+ことを許可してくれました。Rundy Dunlap と Gerrit Huizenga はメーリング
+リストでやるべきこととやってはいけないことのリストを提供してくれました。
 以下の人々のレビュー、コメント、貢献に感謝。
 Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
 Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cc85a9278190..c603ef7b0568 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -245,7 +245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
-				  old_ordering, s4_nonvs, sci_force_enable }
+				  old_ordering, nonvs, sci_force_enable }
 			See Documentation/power/video.txt for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
@@ -1664,6 +1664,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			noexec=on: enable non-executable mappings (default)
 			noexec=off: disable non-executable mappings
 
+	nosmep		[X86]
+			Disable SMEP (Supervisor Mode Execution Protection)
+			even if it is supported by processor.
+
 	noexec32	[X86-64]
 			This affects only 32-bit executables.
 			noexec32=on: enable non-executable mappings (default)
diff --git a/Documentation/md.txt b/Documentation/md.txt
index a81c7b4790f2..2366b1c8cf19 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -552,6 +552,16 @@ also have
      within the array where IO will be blocked.  This is currently
      only supported for raid4/5/6.
 
+   sync_min
+   sync_max
+     The two values, given as numbers of sectors, indicate a range
+     withing the array where 'check'/'repair' will operate. Must be
+     a multiple of chunk_size. When it reaches "sync_max" it will
+     pause, rather than complete.
+     You can use 'select' or 'poll' on "sync_completed" to wait for
+     that number to reach sync_max.  Then you can either increase
+     "sync_max", or can write 'idle' to "sync_action".
+
 
 Each active md device may also have attributes specific to the
 personality module that manages it.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 1971bcf48a60..88880839ece4 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -279,11 +279,15 @@ When the system goes into the standby or memory sleep state, the phases are:
 	time.)  Unlike the other suspend-related phases, during the prepare
 	phase the device tree is traversed top-down.
 
-	The prepare phase uses only a bus callback.  After the callback method
-	returns, no new children may be registered below the device.  The method
-	may also prepare the device or driver in some way for the upcoming
-	system power transition, but it should not put the device into a
-	low-power state.
+	In addition to that, if device drivers need to allocate additional
+	memory to be able to hadle device suspend correctly, that should be
+	done in the prepare phase.
+
+	After the prepare callback method returns, no new children may be
+	registered below the device.  The method may also prepare the device or
+	driver in some way for the upcoming system power transition (for
+	example, by allocating additional memory required for this purpose), but
+	it should not put the device into a low-power state.
 
     2.	The suspend methods should quiesce the device to stop it from performing
 	I/O.  They also may save the device registers and put it into the
diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt
index cf980709122a..c2a4a346c0d9 100644
--- a/Documentation/power/notifiers.txt
+++ b/Documentation/power/notifiers.txt
@@ -1,46 +1,41 @@
 Suspend notifiers
-	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
-
-There are some operations that device drivers may want to carry out in their
-.suspend() routines, but shouldn't, because they can cause the hibernation or
-suspend to fail. For example, a driver may want to allocate a substantial amount
-of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
-swsusp's memory shrinker has run.
-
-Also, there may be some operations, that subsystems want to carry out before a
-hibernation/suspend or after a restore/resume, requiring the system to be fully
-functional, so the drivers' .suspend() and .resume() routines are not suitable
-for this purpose.  For example, device drivers may want to upload firmware to
-their devices after a restore from a hibernation image, but they cannot do it by
-calling request_firmware() from their .resume() routines (user land processes
-are frozen at this point).  The solution may be to load the firmware into
-memory before processes are frozen and upload it from there in the .resume()
-routine.  Of course, a hibernation notifier may be used for this purpose.
-
-The subsystems that have such needs can register suspend notifiers that will be
-called upon the following events by the suspend core:
+	(C) 2007-2011 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+There are some operations that subsystems or drivers may want to carry out
+before hibernation/suspend or after restore/resume, but they require the system
+to be fully functional, so the drivers' and subsystems' .suspend() and .resume()
+or even .prepare() and .complete() callbacks are not suitable for this purpose.
+For example, device drivers may want to upload firmware to their devices after
+resume/restore, but they cannot do it by calling request_firmware() from their
+.resume() or .complete() routines (user land processes are frozen at these
+points).  The solution may be to load the firmware into memory before processes
+are frozen and upload it from there in the .resume() routine.
+A suspend/hibernation notifier may be used for this purpose.
+
+The subsystems or drivers having such needs can register suspend notifiers that
+will be called upon the following events by the PM core:
 
 PM_HIBERNATION_PREPARE	The system is going to hibernate or suspend, tasks will
 			be frozen immediately.
 
 PM_POST_HIBERNATION	The system memory state has been restored from a
-			hibernation image or an error occurred during the
-			hibernation.  Device drivers' .resume() callbacks have
+			hibernation image or an error occurred during
+			hibernation.  Device drivers' restore callbacks have
 			been executed and tasks have been thawed.
 
 PM_RESTORE_PREPARE	The system is going to restore a hibernation image.
-			If all goes well the restored kernel will issue a
+			If all goes well, the restored kernel will issue a
 			PM_POST_HIBERNATION notification.
 
-PM_POST_RESTORE		An error occurred during the hibernation restore.
-			Device drivers' .resume() callbacks have been executed
+PM_POST_RESTORE		An error occurred during restore from hibernation.
+			Device drivers' restore callbacks have been executed
 			and tasks have been thawed.
 
-PM_SUSPEND_PREPARE	The system is preparing for a suspend.
+PM_SUSPEND_PREPARE	The system is preparing for suspend.
 
 PM_POST_SUSPEND		The system has just resumed or an error occurred during
-			the suspend.	Device drivers' .resume() callbacks have
-			been executed and tasks have been thawed.
+			suspend.  Device drivers' resume callbacks have been
+			executed and tasks have been thawed.
 
 It is generally assumed that whatever the notifiers do for
 PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION.  Analogously,
diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx
index 9e15b4f9cd28..19e7cd4bba66 100644
--- a/Documentation/scsi/LICENSE.qla2xxx
+++ b/Documentation/scsi/LICENSE.qla2xxx
@@ -1,11 +1,11 @@
-Copyright (c)  2003-2005 QLogic Corporation
-QLogic Linux Fibre Channel HBA Driver
+Copyright (c) 2003-2011 QLogic Corporation
+QLogic Linux/ESX Fibre Channel HBA Driver
 
-This program includes a device driver for Linux 2.6 that may be
+This program includes a device driver for Linux 2.6/ESX that may be
 distributed with QLogic hardware specific firmware binary file.
 You may modify and redistribute the device driver code under the
-GNU General Public License as published by the Free Software
-Foundation (version 2 or a later version).
+GNU General Public License (a copy of which is attached hereto as
+Exhibit A) published by the Free Software Foundation (version 2).
 
 You may redistribute the hardware specific firmware binary file
 under the following terms:
@@ -43,3 +43,285 @@ OTHERWISE IN ANY INTELLECTUAL PROPERTY RIGHTS (PATENT, COPYRIGHT,
 TRADE SECRET, MASK WORK, OR OTHER PROPRIETARY RIGHT) EMBODIED IN
 ANY OTHER QLOGIC HARDWARE OR SOFTWARE EITHER SOLELY OR IN
 COMBINATION WITH THIS PROGRAM.
+
+
+EXHIBIT A
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/Documentation/sound/alsa/SB-Live-mixer.txt b/Documentation/sound/alsa/SB-Live-mixer.txt
index f5639d40521d..f4b5988f450c 100644
--- a/Documentation/sound/alsa/SB-Live-mixer.txt
+++ b/Documentation/sound/alsa/SB-Live-mixer.txt
@@ -87,14 +87,14 @@ accumulator. ALSA uses accumulators 0 and 1 for left and right PCM.
 The result is forwarded to the ADC capture FIFO (thus to the standard capture
 PCM device).
 
-name='Music Playback Volume',index=0
+name='Synth Playback Volume',index=0
 
 This control is used to attenuate samples for left and right MIDI FX-bus
 accumulators. ALSA uses accumulators 4 and 5 for left and right MIDI samples.
 The result samples are forwarded to the front DAC PCM slots of the AC97 codec.
 
-name='Music Capture Volume',index=0
-name='Music Capture Switch',index=0
+name='Synth Capture Volume',index=0
+name='Synth Capture Switch',index=0
 
 These controls are used to attenuate samples for left and right MIDI FX-bus
 accumulator. ALSA uses accumulators 4 and 5 for left and right PCM.
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 6d27ab8d6e9f..c83bd6b4e6e8 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -120,7 +120,6 @@ format:
         field:unsigned char common_flags;       offset:2;       size:1; signed:0;
         field:unsigned char common_preempt_count;       offset:3; size:1;signed:0;
         field:int common_pid;   offset:4;       size:4; signed:1;
-        field:int common_lock_depth;    offset:8;       size:4; signed:1;
 
         field:unsigned long __probe_ip; offset:12;      size:4; signed:0;
         field:int __probe_nargs;        offset:16;      size:4; signed:1;
diff --git a/Documentation/video4linux/sh_mobile_ceu_camera.txt b/Documentation/video4linux/sh_mobile_ceu_camera.txt
index cb47e723af74..1e96ce6e2d2f 100644
--- a/Documentation/video4linux/sh_mobile_ceu_camera.txt
+++ b/Documentation/video4linux/sh_mobile_ceu_camera.txt
@@ -37,7 +37,7 @@ Generic scaling / cropping scheme
 -1'-
 
 In the above chart minuses and slashes represent "real" data amounts, points and
-accents represent "useful" data, basically, CEU scaled amd cropped output,
+accents represent "useful" data, basically, CEU scaled and cropped output,
 mapped back onto the client's source plane.
 
 Such a configuration can be produced by user requests:
@@ -65,7 +65,7 @@ Do not touch input rectangle - it is already optimal.
 
 1. Calculate current sensor scales:
 
-	scale_s = ((3') - (3)) / ((2') - (2))
+	scale_s = ((2') - (2)) / ((3') - (3))
 
 2. Calculate "effective" input crop (sensor subwindow) - CEU crop scaled back at
 current sensor scales onto input window - this is user S_CROP:
@@ -80,7 +80,7 @@ window:
 4. Calculate sensor output window by applying combined scales to real input
 window:
 
-	width_s_out = ((2') - (2)) / scale_comb
+	width_s_out = ((7') - (7)) = ((2') - (2)) / scale_comb
 
 5. Apply iterative sensor S_FMT for sensor output window.
 
diff --git a/Documentation/virtual/00-INDEX b/Documentation/virtual/00-INDEX
new file mode 100644
index 000000000000..fe0251c4cfb7
--- /dev/null
+++ b/Documentation/virtual/00-INDEX
@@ -0,0 +1,10 @@
+Virtualization support in the Linux kernel.
+
+00-INDEX
+	- this file.
+kvm/
+	- Kernel Virtual Machine.  See also http://linux-kvm.org
+lguest/
+	- Extremely simple hypervisor for experimental/educational use.
+uml/
+	- User Mode Linux, builds/runs Linux kernel as a userspace program.
diff --git a/Documentation/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9bef4e4cec50..9bef4e4cec50 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
diff --git a/Documentation/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 882068538c9c..882068538c9c 100644
--- a/Documentation/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
diff --git a/Documentation/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 3b4cd3bf5631..3b4cd3bf5631 100644
--- a/Documentation/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
diff --git a/Documentation/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index f46aa58389ca..f46aa58389ca 100644
--- a/Documentation/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
diff --git a/Documentation/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index d079aed27e03..d079aed27e03 100644
--- a/Documentation/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
diff --git a/Documentation/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
index 3ab969c59046..3ab969c59046 100644
--- a/Documentation/kvm/ppc-pv.txt
+++ b/Documentation/virtual/kvm/ppc-pv.txt
diff --git a/Documentation/kvm/review-checklist.txt b/Documentation/virtual/kvm/review-checklist.txt
index 730475ae1b8d..a850986ed684 100644
--- a/Documentation/kvm/review-checklist.txt
+++ b/Documentation/virtual/kvm/review-checklist.txt
@@ -7,7 +7,7 @@ Review checklist for kvm patches
 2.  Patches should be against kvm.git master branch.
 
 3.  If the patch introduces or modifies a new userspace API:
-    - the API must be documented in Documentation/kvm/api.txt
+    - the API must be documented in Documentation/virtual/kvm/api.txt
     - the API must be discoverable using KVM_CHECK_EXTENSION
 
 4.  New state must include support for save/restore.
diff --git a/Documentation/kvm/timekeeping.txt b/Documentation/virtual/kvm/timekeeping.txt
index df8946377cb6..df8946377cb6 100644
--- a/Documentation/kvm/timekeeping.txt
+++ b/Documentation/virtual/kvm/timekeeping.txt
diff --git a/Documentation/lguest/.gitignore b/Documentation/virtual/lguest/.gitignore
index 115587fd5f65..115587fd5f65 100644
--- a/Documentation/lguest/.gitignore
+++ b/Documentation/virtual/lguest/.gitignore
diff --git a/Documentation/lguest/Makefile b/Documentation/virtual/lguest/Makefile
index bebac6b4f332..bebac6b4f332 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/virtual/lguest/Makefile
diff --git a/Documentation/lguest/extract b/Documentation/virtual/lguest/extract
index 7730bb6e4b94..7730bb6e4b94 100644
--- a/Documentation/lguest/extract
+++ b/Documentation/virtual/lguest/extract
diff --git a/Documentation/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index d9da7e148538..d9da7e148538 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
diff --git a/Documentation/lguest/lguest.txt b/Documentation/virtual/lguest/lguest.txt
index dad99978a6a8..bff0c554485d 100644
--- a/Documentation/lguest/lguest.txt
+++ b/Documentation/virtual/lguest/lguest.txt
@@ -74,7 +74,8 @@ Running Lguest:
 
 - Run an lguest as root:
 
-      Documentation/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 --block=rootfile root=/dev/vda
+      Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
+        --block=rootfile root=/dev/vda
 
    Explanation:
     64: the amount of memory to use, in MB.
diff --git a/Documentation/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
index 9b7e1904db1c..9b7e1904db1c 100644
--- a/Documentation/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index 01c513fac40e..a0b577de918f 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -12,6 +12,7 @@ CONTENTS
 4. Application Programming Interface (API)
 5. Example Execution Scenarios
 6. Guidelines
+7. Debugging
 
 
 1. Introduction
@@ -379,3 +380,42 @@ If q1 has WQ_CPU_INTENSIVE set,
 * Unless work items are expected to consume a huge amount of CPU
   cycles, using a bound wq is usually beneficial due to the increased
   level of locality in wq operations and work item execution.
+
+
+7. Debugging
+
+Because the work functions are executed by generic worker threads
+there are a few tricks needed to shed some light on misbehaving
+workqueue users.
+
+Worker threads show up in the process list as:
+
+root      5671  0.0  0.0      0     0 ?        S    12:07   0:00 [kworker/0:1]
+root      5672  0.0  0.0      0     0 ?        S    12:07   0:00 [kworker/1:2]
+root      5673  0.0  0.0      0     0 ?        S    12:12   0:00 [kworker/0:0]
+root      5674  0.0  0.0      0     0 ?        S    12:13   0:00 [kworker/1:0]
+
+If kworkers are going crazy (using too much cpu), there are two types
+of possible problems:
+
+	1. Something beeing scheduled in rapid succession
+	2. A single work item that consumes lots of cpu cycles
+
+The first one can be tracked using tracing:
+
+	$ echo workqueue:workqueue_queue_work > /sys/kernel/debug/tracing/set_event
+	$ cat /sys/kernel/debug/tracing/trace_pipe > out.txt
+	(wait a few secs)
+	^C
+
+If something is busy looping on work queueing, it would be dominating
+the output and the offender can be determined with the work item
+function.
+
+For the second type of problems it should be possible to just check
+the stack trace of the offending worker thread.
+
+	$ cat /proc/THE_OFFENDING_KWORKER/stack
+
+The work item's function should be trivially visible in the stack
+trace.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 092e596a1301..c54b4f503e2a 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -206,7 +206,7 @@ IOMMU (input/output memory management unit)
       (e.g. because you have < 3 GB memory).
       Kernel boot message: "PCI-DMA: Disabling IOMMU"
 
-   2. <arch/x86_64/kernel/pci-gart.c>: AMD GART based hardware IOMMU.
+   2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
       Kernel boot message: "PCI-DMA: using GART IOMMU"
 
    3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
diff --git a/Documentation/zh_CN/email-clients.txt b/Documentation/zh_CN/email-clients.txt
new file mode 100644
index 000000000000..5d65e323d060
--- /dev/null
+++ b/Documentation/zh_CN/email-clients.txt
@@ -0,0 +1,210 @@
+锘?Chinese translated version of Documentation/email-clients.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly.  However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help.  Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
+---------------------------------------------------------------------
+Documentation/email-clients.txt ???涓????缈昏??
+
+濡??????宠??璁烘????存?版???????????瀹癸??璇风?存?ヨ??绯诲?????妗ｇ??缁存?よ?????濡????浣?浣跨?ㄨ?辨??
+浜ゆ???????伴?剧??璇?锛?涔????浠ュ??涓???????缁存?よ??姹???┿??濡???????缈昏????存?颁???????舵?????缈?
+璇?瀛???ㄩ??棰?锛?璇疯??绯讳腑??????缁存?よ?????
+
+涓???????缁存?よ??锛? 璐惧??濞?  Harry Wei <harryxiyou@gmail.com>
+涓???????缈昏?????锛? 璐惧??濞?  Harry Wei <harryxiyou@gmail.com>
+涓?????????¤?????锛? Yinglin Luan <synmyth@gmail.com>
+		Xiaochen Wang <wangxiaochen0@gmail.com>
+		yaxinsn <yaxinsn@163.com>
+
+浠ヤ??涓烘?ｆ??
+---------------------------------------------------------------------
+
+Linux???浠跺?㈡?风?????缃?淇℃??
+======================================================================
+
+?????????缃?
+----------------------------------------------------------------------
+Linux?????歌ˉ涓???????杩????浠惰?????浜ょ??锛????濂芥??琛ヤ??浣?涓洪??浠朵????????宓?????????????浜?缁存?よ??
+??ユ?堕??浠讹??浣???????浠剁?????瀹规?煎??搴?璇ユ??"text/plain"?????惰??锛????浠朵????????涓?璧???????锛?
+???涓鸿??浼?浣胯ˉ涓????寮???ㄩ?ㄥ????ㄨ??璁鸿??绋?涓???????寰???伴?俱??
+
+??ㄦ?ュ?????Linux?????歌ˉ涓???????浠跺?㈡?风????ㄥ?????琛ヤ????跺??璇ュ??浜?????????????濮???舵?????渚?濡?锛?
+浠?浠?涓???芥?瑰?????????????ゅ?惰〃绗???????绌烘?硷???????虫????ㄦ??涓?琛????寮?澶存?????缁?灏俱??
+
+涓?瑕????杩?"format=flowed"妯″????????琛ヤ?????杩???蜂??寮?璧蜂?????棰????浠ュ?????瀹崇?????琛????
+
+涓?瑕?璁╀????????浠跺?㈡?风??杩?琛??????ㄦ?㈣?????杩???蜂??浼???村??浣????琛ヤ?????
+
+???浠跺?㈡?风??涓???芥?瑰???????????瀛?绗????缂??????瑰?????瑕??????????琛ヤ???????芥??ASCII??????UTF-8缂??????瑰??锛?
+濡????浣?浣跨??UTF-8缂??????瑰???????????浠讹????ｄ??浣?灏?浼???垮??涓?浜??????藉????????瀛?绗???????棰????
+
+???浠跺?㈡?风??搴?璇ュ舰???骞朵??淇???? References: ?????? In-Reply-To: ???棰?锛???ｄ??
+???浠惰??棰?灏变??浼?涓???????
+
+澶???剁??甯?(?????????璐寸??甯?)???甯镐????界?ㄤ??琛ヤ??锛????涓哄?惰〃绗?浼?杞????涓虹┖??笺??浣跨??xclipboard, xclip
+??????xcutsel涔?璁稿??浠ワ??浣???????濂芥??璇?涓?涓?????????垮??浣跨?ㄥ????剁??甯????
+
+涓?瑕???ㄤ娇???PGP/GPG缃插????????浠朵腑??????琛ヤ?????杩???蜂??浣垮??寰?澶???????涓???借?诲??????????ㄤ??浣????琛ヤ?????
+锛?杩?涓????棰?搴?璇ユ?????浠ヤ慨澶????锛?
+
+??ㄧ???????搁??浠跺??琛ㄥ?????琛ヤ??涔????锛?缁????宸卞?????涓?涓?琛ヤ?????涓?涓???????涓绘??锛?淇?瀛???ユ?跺?扮??
+???浠讹??灏?琛ヤ?????'patch'??戒护???涓?锛?濡??????????浜?锛????缁??????搁??浠跺??琛ㄥ????????
+
+
+涓?浜????浠跺?㈡?风?????绀?
+----------------------------------------------------------------------
+杩????缁???轰??浜?璇?缁????MUA???缃????绀猴?????浠ョ?ㄤ??缁?Linux?????稿?????琛ヤ?????杩?浜?骞朵???????虫??
+?????????杞?浠跺?????缃???荤?????
+
+璇存??锛?
+TUI = 浠ユ?????涓哄?虹???????ㄦ?锋?ュ??
+GUI = ??惧舰?????㈢?ㄦ?锋?ュ??
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Alpine (TUI)
+
+???缃????椤癸??
+???"Sending Preferences"??ㄥ??锛?
+
+- "Do Not Send Flowed Text"蹇?椤诲?????
+- "Strip Whitespace Before Sending"蹇?椤诲?抽??
+
+褰???????浠舵?讹????????搴?璇ユ?惧?ㄨˉ涓?浼???虹?扮????版?癸????跺?????涓?CTRL-R缁???????锛?浣挎??瀹????
+琛ヤ?????浠跺????ュ?伴??浠朵腑???
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Evolution (GUI)
+
+涓?浜?寮????????????????浣跨?ㄥ????????琛ヤ??
+
+褰??????╅??浠堕??椤癸??Preformat
+  浠?Format->Heading->Preformatted (Ctrl-7)??????宸ュ?锋??
+
+??跺??浣跨??锛?
+  Insert->Text File... (Alt-n x)?????ヨˉ涓????浠躲??
+
+浣?杩????浠?"diff -Nru old.c new.c | xclip"锛???????Preformat锛???跺??浣跨?ㄤ腑??撮??杩?琛?绮?甯????
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Kmail (GUI)
+
+涓?浜?寮????????????????浣跨?ㄥ????????琛ヤ?????
+
+榛?璁よ?剧疆涓?涓?HTML??煎??????????????锛?涓?瑕??????ㄥ?????
+
+褰?涔????涓?灏????浠剁????跺??锛???ㄩ??椤逛?????涓?瑕??????╄????ㄦ?㈣????????涓????缂虹?瑰氨???浣???ㄩ??浠朵腑杈???ョ??浠讳????????
+??戒??浼?琚??????ㄦ?㈣??锛????姝や??蹇?椤诲?ㄥ?????琛ヤ??涔?????????ㄦ?㈣????????绠?????????规??灏辨???????ㄨ????ㄦ?㈣????ヤ功??????浠讹??
+??跺?????瀹?淇?瀛?涓鸿??绋裤??涓????浣???ㄨ??绋夸腑???娆℃??寮?瀹?锛?瀹?宸茬????ㄩ?ㄨ????ㄦ?㈣??浜?锛???ｄ??浣???????浠惰?界?舵病???
+?????╄????ㄦ?㈣??锛?浣????杩?涓?浼?澶卞?诲凡???????????ㄦ?㈣?????
+
+??ㄩ??浠剁??搴????锛??????ヨˉ涓?涔????锛???句??甯哥?ㄧ??琛ヤ??瀹????绗?锛?涓?涓?杩?瀛????(---)???
+
+??跺?????"Message"????????＄??锛??????╂????ユ??浠讹????ョ????????浣????琛ヤ?????浠躲??杩????涓?涓?棰?澶???????椤癸??浣????浠?
+???杩?瀹????缃?浣???????浠跺缓绔?宸ュ?锋????????锛?杩????浠ュ甫涓?"insert file"??炬?????
+
+浣????浠ュ????ㄥ?伴??杩?GPG???璁伴??浠讹??浣???????宓?琛ヤ?????濂戒??瑕?浣跨??GPG???璁板??浠????浣?涓哄??宓??????????绛惧??琛ヤ??锛?
+褰?浠?GPG涓???????7浣?缂??????朵??浣夸??浠?????????村??澶???????
+
+濡????浣????瑕?浠ラ??浠剁??褰㈠????????琛ヤ??锛???ｄ??灏卞?抽????瑰?婚??浠讹????跺?????涓?灞???э??绐????"Suggest automatic
+display"锛?杩???峰??宓????浠舵?村?规??璁╄?昏???????般??
+
+褰?浣?瑕?淇?瀛?灏?瑕?????????????宓???????琛ヤ??锛?浣????浠ヤ??娑???????琛ㄧ????奸????╁?????琛ヤ????????浠讹????跺????冲?婚?????
+"save as"???浣????浠ヤ娇??ㄤ??涓?娌℃????存?圭????????琛ヤ????????浠讹??濡????瀹????浠ユ?ｇ‘???褰㈠??缁???????褰?浣?姝ｇ????ㄥ??
+???宸辩??绐???ｄ??涓?瀵????锛???ｆ?舵病??????椤瑰??浠ヤ??瀛????浠?--宸茬?????涓?涓?杩???风??bug琚?姹???ュ?颁??kmail???bugzilla
+骞朵??甯????杩?灏?浼?琚?澶??????????浠舵??浠ュ?????瀵规??涓???ㄦ?峰??璇诲???????????琚?淇?瀛????锛????浠ュ?????浣???虫?????浠跺????跺?板?朵????版?癸??
+浣?涓?寰?涓????浠?浠????????????逛负缁?????????翠?????璇汇??
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Lotus Notes (GUI)
+
+涓?瑕?浣跨?ㄥ?????
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Mutt (TUI)
+
+寰?澶?Linux寮????浜哄??浣跨??mutt瀹㈡?风??锛????浠ヨ?????瀹????瀹?宸ヤ????????甯告??浜????
+
+Mutt涓????甯?缂?杈????锛????浠ヤ??绠′??浣跨?ㄤ??涔?缂?杈???ㄩ?戒??搴?璇ュ甫????????ㄦ??琛????澶у????扮??杈???ㄩ?藉甫???
+涓?涓?"insert file"???椤癸??瀹????浠ラ??杩?涓???瑰?????浠跺??瀹圭????瑰???????ユ??浠躲??
+
+'vim'浣?涓?mutt???缂?杈????锛?
+  set editor="vi"
+
+  濡????浣跨??xclip锛???插?ヤ互涓???戒护
+  :set paste
+  ???涓????涔??????????shift-insert??????浣跨??
+  :r filename
+
+濡??????宠?????琛ヤ??浣?涓哄??宓??????????
+(a)ttach宸ヤ?????寰?濂斤??涓?甯????"set paste"???
+
+???缃????椤癸??
+瀹?搴?璇ヤ互榛?璁よ?剧疆???褰㈠??宸ヤ?????
+??惰??锛????"send_charset"璁剧疆涓?"us-ascii::utf-8"涔????涓?涓?涓???????涓绘?????
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Pine (TUI)
+
+Pine杩???绘??涓?浜?绌烘?煎????????棰?锛?浣????杩?浜???板?ㄥ??璇ラ?借??淇?澶?浜????
+
+濡???????浠ワ??璇蜂娇???alpine(pine???缁ф?胯??)
+
+???缃????椤癸??
+- ???杩?????????????瑕?娑???ゆ??绋???????
+- "no-strip-whitespace-before-send"???椤逛????????瑕???????
+
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sylpheed (GUI)
+
+- ???宓??????????浠ュ??濂界??宸ヤ??锛???????浣跨?ㄩ??浠讹?????
+- ???璁镐娇??ㄥ????ㄧ??缂?杈???ㄣ??
+- 瀵逛?????褰?杈?澶???堕??甯告?????
+- 濡???????杩?non-SSL杩???ワ?????娉?浣跨??TLS SMTP?????????
+- ??ㄧ?????绐???ｄ腑???涓?涓?寰??????ㄧ??ruler bar???
+- 缁???板?????涓?娣诲????板??灏变??浼?姝ｇ‘???浜?瑙ｆ?剧ず??????
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Thunderbird (GUI)
+
+榛?璁ゆ????典??锛?thunderbird寰?瀹规??????????????锛?浣????杩????涓?浜???规?????浠ュ己??跺?????寰???村ソ???
+
+- ??ㄧ?ㄦ?峰????疯?剧疆???锛?缁???????瀵诲??锛?涓?瑕???????"Compose messages in HTML format"???
+
+- 缂?杈?浣????Thunderbird???缃?璁剧疆??ヤ娇瀹?涓?瑕????琛?浣跨??锛?user_pref("mailnews.wraplength", 0);
+
+- 缂?杈?浣????Thunderbird???缃?璁剧疆锛?浣垮??涓?瑕?浣跨??"format=flowed"??煎??锛?user_pref("mailnews.
+  send_plaintext_flowed", false);
+
+- 浣????瑕?浣?Thunderbird???涓洪???????煎????瑰??锛?
+  濡????榛?璁ゆ????典??浣?涔??????????HTML??煎??锛???ｄ?????寰???俱??浠?浠?浠????棰???????涓????妗?涓???????"Preformat"??煎?????
+  濡????榛?璁ゆ????典??浣?涔??????????????????煎??锛?浣?涓?寰????瀹???逛负HTML??煎??锛?浠?浠?浣?涓轰??娆℃?х??锛???ヤ功?????扮??娑????锛?
+  ??跺??寮哄?朵娇瀹??????版???????煎??锛???????瀹?灏变?????琛????瑕?瀹???板??锛???ㄥ??淇＄????炬??涓?浣跨??shift?????ヤ娇瀹????涓?HTML
+  ??煎??锛???跺?????棰???????涓????妗?涓???????"Preformat"??煎?????
+
+- ???璁镐娇??ㄥ????ㄧ??缂?杈????锛?
+  ???瀵?Thunderbird???琛ヤ?????绠?????????规??灏辨??浣跨?ㄤ??涓?"external editor"??╁??锛???跺??浣跨?ㄤ????????娆㈢??
+  $EDITOR??ヨ?诲???????????骞惰ˉ涓???版?????涓????瑕?瀹???板??锛????浠ヤ??杞藉苟涓?瀹?瑁?杩?涓???╁??锛???跺??娣诲??涓?涓?浣跨?ㄥ?????
+  ??????View->Toolbars->Customize...??????褰?浣?涔????淇℃???????跺??浠?浠???瑰?诲??灏卞??浠ヤ?????
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TkRat (GUI)
+
+???浠ヤ娇??ㄥ?????浣跨??"Insert file..."??????澶???ㄧ??缂?杈???ㄣ??
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Gmail (Web GUI)
+
+涓?瑕?浣跨?ㄥ????????琛ヤ?????
+
+Gmail缃?椤靛?㈡?风???????ㄥ?版????惰〃绗?杞????涓虹┖??笺??
+
+??界?跺?惰〃绗?杞????涓虹┖??奸??棰????浠ヨ??澶???ㄧ??杈???ㄨВ??筹???????跺??杩?浼?浣跨?ㄥ??杞???㈣?????姣?琛???????涓?78涓?瀛?绗????
+
+???涓?涓????棰????Gmail杩?浼????浠讳??涓????ASCII???瀛?绗????淇℃????逛负base64缂???????瀹????涓?瑗垮????????娆ф床浜虹?????瀛????
+
+                                ###
diff --git a/MAINTAINERS b/MAINTAINERS
index e23cbd16f1bd..49a0bf3a5b97 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -185,10 +185,9 @@ F:	Documentation/filesystems/9p.txt
 F:	fs/9p/
 
 A2232 SERIAL BOARD DRIVER
-M:	Enver Haase <A2232@gmx.net>
 L:	linux-m68k@lists.linux-m68k.org
-S:	Maintained
-F:	drivers/char/ser_a2232*
+S:	Orphan
+F:	drivers/staging/generic_serial/ser_a2232*
 
 AACRAID SCSI RAID DRIVER
 M:	Adaptec OEM Raid Solutions <aacraid@adaptec.com>
@@ -406,8 +405,8 @@ S:	Maintained
 F:	sound/oss/aedsp16.c
 
 AFFS FILE SYSTEM
-M:	Roman Zippel <zippel@linux-m68k.org>
-S:	Maintained
+L:	linux-fsdevel@vger.kernel.org
+S:	Orphan
 F:	Documentation/filesystems/affs.txt
 F:	fs/affs/
 
@@ -878,6 +877,13 @@ F:	arch/arm/mach-mv78xx0/
 F:	arch/arm/mach-orion5x/
 F:	arch/arm/plat-orion/
 
+ARM/Orion SoC/Technologic Systems TS-78xx platform support
+M:	Alexander Clouter <alex@digriz.org.uk>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+W:	http://www.digriz.org.uk/ts78xx/kernel
+S:	Maintained
+F:	arch/arm/mach-orion5x/ts78xx-*
+
 ARM/MIOA701 MACHINE SUPPORT
 M:	Robert Jarzmik <robert.jarzmik@free.fr>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1026,12 +1032,13 @@ W:	http://www.fluff.org/ben/linux/
 S:	Maintained
 F:	arch/arm/mach-s3c64xx/
 
-ARM/S5P ARM ARCHITECTURES
+ARM/S5P EXYNOS ARM ARCHITECTURES
 M:	Kukjin Kim <kgene.kim@samsung.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-s5p*/
+F:	arch/arm/mach-exynos*/
 
 ARM/SAMSUNG MOBILE MACHINE SUPPORT
 M:	Kyungmin Park <kyungmin.park@samsung.com>
@@ -1064,7 +1071,7 @@ F:	arch/arm/mach-shmobile/
 F:	drivers/sh/
 
 ARM/TELECHIPS ARM ARCHITECTURE
-M:	"Hans J. Koch" <hjk@linutronix.de>
+M:	"Hans J. Koch" <hjk@hansjkoch.de>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/plat-tcc/
@@ -1817,11 +1824,10 @@ S:	Maintained
 F:	drivers/platform/x86/compal-laptop.c
 
 COMPUTONE INTELLIPORT MULTIPORT CARD
-M:	"Michael H. Warfield" <mhw@wittsend.com>
 W:	http://www.wittsend.com/computone.html
-S:	Maintained
+S:	Orphan
 F:	Documentation/serial/computone.txt
-F:	drivers/char/ip2/
+F:	drivers/staging/tty/ip2/
 
 CONEXANT ACCESSRUNNER USB DRIVER
 M:	Simon Arlott <cxacru@fire.lp0.eu>
@@ -2004,7 +2010,7 @@ F:	drivers/net/wan/cycx*
 CYCLADES ASYNC MUX DRIVER
 W:	http://www.cyclades.com/
 S:	Orphan
-F:	drivers/char/cyclades.c
+F:	drivers/tty/cyclades.c
 F:	include/linux/cyclades.h
 
 CYCLADES PC300 DRIVER
@@ -2118,8 +2124,8 @@ L:	Eng.Linux@digi.com
 W:	http://www.digi.com
 S:	Orphan
 F:	Documentation/serial/digiepca.txt
-F:	drivers/char/epca*
-F:	drivers/char/digi*
+F:	drivers/staging/tty/epca*
+F:	drivers/staging/tty/digi*
 
 DIOLAN U2C-12 I2C DRIVER
 M:	Guenter Roeck <guenter.roeck@ericsson.com>
@@ -2796,42 +2802,23 @@ GPIO SUBSYSTEM
 M:	Grant Likely <grant.likely@secretlab.ca>
 S:	Maintained
 T:	git git://git.secretlab.ca/git/linux-2.6.git
-F:	Documentation/gpio/gpio.txt
+F:	Documentation/gpio.txt
 F:	drivers/gpio/
 F:	include/linux/gpio*
 
+GRE DEMULTIPLEXER DRIVER
+M:	Dmitry Kozlov <xeb@mail.ru>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	net/ipv4/gre.c
+F:	include/net/gre.h
+
 GRETH 10/100/1G Ethernet MAC device driver
 M:	Kristoffer Glembo <kristoffer@gaisler.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/greth*
 
-HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER
-M:	Frank Seidel <frank@f-seidel.de>
-L:	platform-driver-x86@vger.kernel.org
-W:	http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
-S:	Maintained
-F:	drivers/platform/x86/hdaps.c
-
-HWPOISON MEMORY FAILURE HANDLING
-M:	Andi Kleen <andi@firstfloor.org>
-L:	linux-mm@kvack.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6.git hwpoison
-S:	Maintained
-F:	mm/memory-failure.c
-F:	mm/hwpoison-inject.c
-
-HYPERVISOR VIRTUAL CONSOLE DRIVER
-L:	linuxppc-dev@lists.ozlabs.org
-S:	Odd Fixes
-F:	drivers/tty/hvc/
-
-iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER
-M:	Peter Jones <pjones@redhat.com>
-M:	Konrad Rzeszutek Wilk <konrad@kernel.org>
-S:	Maintained
-F:	drivers/firmware/iscsi_ibft*
-
 GSPCA FINEPIX SUBDRIVER
 M:	Frank Zago <frank@zago.net>
 L:	linux-media@vger.kernel.org
@@ -2882,6 +2869,26 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
 S:	Maintained
 F:	drivers/media/video/gspca/
 
+HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER
+M:	Frank Seidel <frank@f-seidel.de>
+L:	platform-driver-x86@vger.kernel.org
+W:	http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
+S:	Maintained
+F:	drivers/platform/x86/hdaps.c
+
+HWPOISON MEMORY FAILURE HANDLING
+M:	Andi Kleen <andi@firstfloor.org>
+L:	linux-mm@kvack.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6.git hwpoison
+S:	Maintained
+F:	mm/memory-failure.c
+F:	mm/hwpoison-inject.c
+
+HYPERVISOR VIRTUAL CONSOLE DRIVER
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Odd Fixes
+F:	drivers/tty/hvc/
+
 HARDWARE MONITORING
 M:	Jean Delvare <khali@linux-fr.org>
 M:	Guenter Roeck <guenter.roeck@ericsson.com>
@@ -2932,8 +2939,8 @@ F:	drivers/block/cciss*
 F:	include/linux/cciss_ioctl.h
 
 HFS FILESYSTEM
-M:	Roman Zippel <zippel@linux-m68k.org>
-S:	Maintained
+L:	linux-fsdevel@vger.kernel.org
+S:	Orphan
 F:	Documentation/filesystems/hfs.txt
 F:	fs/hfs/
 
@@ -3471,6 +3478,12 @@ F:	Documentation/isapnp.txt
 F:	drivers/pnp/isapnp/
 F:	include/linux/isapnp.h
 
+iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER
+M:	Peter Jones <pjones@redhat.com>
+M:	Konrad Rzeszutek Wilk <konrad@kernel.org>
+S:	Maintained
+F:	drivers/firmware/iscsi_ibft*
+
 ISCSI
 M:	Mike Christie <michaelc@cs.wisc.edu>
 L:	open-iscsi@googlegroups.com
@@ -3800,7 +3813,7 @@ M:	Rusty Russell <rusty@rustcorp.com.au>
 L:	lguest@lists.ozlabs.org
 W:	http://lguest.ozlabs.org/
 S:	Odd Fixes
-F:	Documentation/lguest/
+F:	Documentation/virtual/lguest/
 F:	arch/x86/lguest/
 F:	drivers/lguest/
 F:	include/linux/lguest*.h
@@ -3987,7 +4000,6 @@ F:	arch/m32r/
 
 M68K ARCHITECTURE
 M:	Geert Uytterhoeven <geert@linux-m68k.org>
-M:	Roman Zippel <zippel@linux-m68k.org>
 L:	linux-m68k@lists.linux-m68k.org
 W:	http://www.linux-m68k.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git
@@ -4077,7 +4089,7 @@ F:	drivers/video/matrox/matroxfb_*
 F:	include/linux/matroxfb.h
 
 MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
-M:	"Hans J. Koch" <hjk@linutronix.de>
+M:	"Hans J. Koch" <hjk@hansjkoch.de>
 L:	lm-sensors@lm-sensors.org
 S:	Maintained
 F:	Documentation/hwmon/max6650
@@ -4192,7 +4204,7 @@ MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD
 M:	Jiri Slaby <jirislaby@gmail.com>
 S:	Maintained
 F:	Documentation/serial/moxa-smartio
-F:	drivers/char/mxser.*
+F:	drivers/tty/mxser.*
 
 MSI LAPTOP SUPPORT
 M:	"Lee, Chun-Yi" <jlee@novell.com>
@@ -4234,7 +4246,7 @@ F:	sound/oss/msnd*
 
 MULTITECH MULTIPORT CARD (ISICOM)
 S:	Orphan
-F:	drivers/char/isicom.c
+F:	drivers/tty/isicom.c
 F:	include/linux/isicom.h
 
 MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
@@ -4983,6 +4995,13 @@ F:	Documentation/pps/
 F:	drivers/pps/
 F:	include/linux/pps*.h
 
+PPTP DRIVER
+M:	Dmitry Kozlov <xeb@mail.ru>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/pptp.c
+W:	http://sourceforge.net/projects/accel-pptp
+
 PREEMPTIBLE KERNEL
 M:	Robert Love <rml@tech9.net>
 L:	kpreempt-tech@lists.sourceforge.net
@@ -5274,14 +5293,14 @@ F:	drivers/memstick/host/r592.*
 RISCOM8 DRIVER
 S:	Orphan
 F:	Documentation/serial/riscom8.txt
-F:	drivers/char/riscom8*
+F:	drivers/staging/tty/riscom8*
 
 ROCKETPORT DRIVER
 P:	Comtrol Corp.
 W:	http://www.comtrol.com
 S:	Maintained
 F:	Documentation/serial/rocket.txt
-F:	drivers/char/rocket*
+F:	drivers/tty/rocket*
 
 ROSE NETWORK LAYER
 M:	Ralf Baechle <ralf@linux-mips.org>
@@ -5391,7 +5410,7 @@ F:	drivers/media/video/*7146*
 F:	include/media/*7146*
 
 SAMSUNG AUDIO (ASoC) DRIVERS
-M:	Jassi Brar <jassi.brar@samsung.com>
+M:	Jassi Brar <jassisinghbrar@gmail.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
 F:	sound/soc/samsung
@@ -5412,6 +5431,7 @@ F:	include/linux/timex.h
 F:	kernel/time/clocksource.c
 F:	kernel/time/time*.c
 F:	kernel/time/ntp.c
+F:	drivers/clocksource
 
 TLG2300 VIDEO4LINUX-2 DRIVER
 M:	Huang Shijie <shijie8@gmail.com>
@@ -5592,9 +5612,9 @@ F:	include/linux/ata.h
 F:	include/linux/libata.h
 
 SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
-M:	Jayamohan Kallickal <jayamohank@serverengines.com>
+M:	Jayamohan Kallickal <jayamohan.kallickal@emulex.com>
 L:	linux-scsi@vger.kernel.org
-W:	http://www.serverengines.com
+W:	http://www.emulex.com
 S:	Supported
 F:	drivers/scsi/be2iscsi/
 
@@ -5924,10 +5944,9 @@ F:	arch/arm/mach-spear6xx/spear600.c
 F:	arch/arm/mach-spear6xx/spear600_evb.c
 
 SPECIALIX IO8+ MULTIPORT SERIAL CARD DRIVER
-M:	Roger Wolff <R.E.Wolff@BitWizard.nl>
-S:	Supported
+S:	Orphan
 F:	Documentation/serial/specialix.txt
-F:	drivers/char/specialix*
+F:	drivers/staging/tty/specialix*
 
 SPI SUBSYSTEM
 M:	David Brownell <dbrownell@users.sourceforge.net>
@@ -5972,7 +5991,6 @@ F:	arch/alpha/kernel/srm_env.c
 
 STABLE BRANCH
 M:	Greg Kroah-Hartman <greg@kroah.com>
-M:	Chris Wright <chrisw@sous-sol.org>
 L:	stable@kernel.org
 S:	Maintained
 
@@ -6256,7 +6274,8 @@ M:	Greg Ungerer <gerg@uclinux.org>
 W:	http://www.uclinux.org/
 L:	uclinux-dev@uclinux.org  (subscribers-only)
 S:	Maintained
-F:	arch/m68knommu/
+F:	arch/m68k/*/*_no.*
+F:	arch/m68k/include/asm/*_no.*
 
 UCLINUX FOR RENESAS H8/300 (H8300)
 M:	Yoshinori Sato <ysato@users.sourceforge.jp>
@@ -6620,13 +6639,13 @@ L:	user-mode-linux-devel@lists.sourceforge.net
 L:	user-mode-linux-user@lists.sourceforge.net
 W:	http://user-mode-linux.sourceforge.net
 S:	Maintained
-F:	Documentation/uml/
+F:	Documentation/virtual/uml/
 F:	arch/um/
 F:	fs/hostfs/
 F:	fs/hppfs/
 
 USERSPACE I/O (UIO)
-M:	"Hans J. Koch" <hjk@linutronix.de>
+M:	"Hans J. Koch" <hjk@hansjkoch.de>
 M:	Greg Kroah-Hartman <gregkh@suse.de>
 S:	Maintained
 F:	Documentation/DocBook/uio-howto.tmpl
@@ -6924,6 +6943,18 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86.
 S:	Maintained
 F:	drivers/platform/x86
 
+XEN HYPERVISOR INTERFACE
+M:	Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	virtualization@lists.linux-foundation.org
+S:	Supported
+F:	arch/x86/xen/
+F:	drivers/*/xen-*front.c
+F:	drivers/xen/
+F:	arch/x86/include/asm/xen/
+F:	include/xen/
+
 XEN NETWORK BACKEND DRIVER
 M:	Ian Campbell <ian.campbell@citrix.com>
 L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
@@ -6945,18 +6976,6 @@ S:	Supported
 F:	arch/x86/xen/*swiotlb*
 F:	drivers/xen/*swiotlb*
 
-XEN HYPERVISOR INTERFACE
-M:	Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
-M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
-L:	virtualization@lists.linux-foundation.org
-S:	Supported
-F:	arch/x86/xen/
-F:	drivers/*/xen-*front.c
-F:	drivers/xen/
-F:	arch/x86/include/asm/xen/
-F:	include/xen/
-
 XFS FILESYSTEM
 P:	Silicon Graphics Inc
 M:	Alex Elder <aelder@sgi.com>
@@ -7026,20 +7045,6 @@ M:	"Maciej W. Rozycki" <macro@linux-mips.org>
 S:	Maintained
 F:	drivers/tty/serial/zs.*
 
-GRE DEMULTIPLEXER DRIVER
-M:	Dmitry Kozlov <xeb@mail.ru>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	net/ipv4/gre.c
-F:	include/net/gre.h
-
-PPTP DRIVER
-M:	Dmitry Kozlov <xeb@mail.ru>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/pptp.c
-W:	http://sourceforge.net/projects/accel-pptp
-
 THE REST
 M:	Linus Torvalds <torvalds@linux-foundation.org>
 L:	linux-kernel@vger.kernel.org
diff --git a/Makefile b/Makefile
index 8392b64079df..a0344a81a893 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 39
-EXTRAVERSION = -rc2
+EXTRAVERSION =
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
@@ -1268,6 +1268,7 @@ help:
 	@echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
 	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
 	@echo  '  make W=1   [targets] Enable extra gcc checks'
+	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
 	@echo  ''
 	@echo  'Execute "make" or "make all" to build all targets marked with [*] '
 	@echo  'For further info see the ./README file'
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937bf5a77..b1834166922d 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,14 @@
 #define __NR_fanotify_init		494
 #define __NR_fanotify_mark		495
 #define __NR_prlimit64			496
+#define __NR_name_to_handle_at		497
+#define __NR_open_by_handle_at		498
+#define __NR_clock_adjtime		499
+#define __NR_syncfs			500
 
 #ifdef __KERNEL__
 
-#define NR_SYSCALLS			497
+#define NR_SYSCALLS			501
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index 9bb7b858ed23..7a6d908bb865 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -4,7 +4,7 @@
 
 extra-y		:= head.o vmlinux.lds
 asflags-y	:= $(KBUILD_CFLAGS)
-ccflags-y	:= -Werror -Wno-sign-compare
+ccflags-y	:= -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
 	    irq_alpha.o signal.o setup.o ptrace.o time.o \
diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c
index 381fec0af52e..da7bcc372f16 100644
--- a/arch/alpha/kernel/core_mcpcia.c
+++ b/arch/alpha/kernel/core_mcpcia.c
@@ -88,7 +88,7 @@ conf_read(unsigned long addr, unsigned char type1,
 {
 	unsigned long flags;
 	unsigned long mid = MCPCIA_HOSE2MID(hose->index);
-	unsigned int stat0, value, temp, cpu;
+	unsigned int stat0, value, cpu;
 
 	cpu = smp_processor_id();
 
@@ -101,7 +101,7 @@ conf_read(unsigned long addr, unsigned char type1,
 	stat0 = *(vuip)MCPCIA_CAP_ERR(mid);
 	*(vuip)MCPCIA_CAP_ERR(mid) = stat0;
 	mb();
-	temp = *(vuip)MCPCIA_CAP_ERR(mid);
+	*(vuip)MCPCIA_CAP_ERR(mid);
 	DBG_CFG(("conf_read: MCPCIA_CAP_ERR(%d) was 0x%x\n", mid, stat0));
 
 	mb();
@@ -136,7 +136,7 @@ conf_write(unsigned long addr, unsigned int value, unsigned char type1,
 {
 	unsigned long flags;
 	unsigned long mid = MCPCIA_HOSE2MID(hose->index);
-	unsigned int stat0, temp, cpu;
+	unsigned int stat0, cpu;
 
 	cpu = smp_processor_id();
 
@@ -145,7 +145,7 @@ conf_write(unsigned long addr, unsigned int value, unsigned char type1,
 	/* Reset status register to avoid losing errors.  */
 	stat0 = *(vuip)MCPCIA_CAP_ERR(mid);
 	*(vuip)MCPCIA_CAP_ERR(mid) = stat0; mb();
-	temp = *(vuip)MCPCIA_CAP_ERR(mid);
+	*(vuip)MCPCIA_CAP_ERR(mid);
 	DBG_CFG(("conf_write: MCPCIA CAP_ERR(%d) was 0x%x\n", mid, stat0));
 
 	draina();
@@ -157,7 +157,7 @@ conf_write(unsigned long addr, unsigned int value, unsigned char type1,
 	*((vuip)addr) = value;
 	mb();
 	mb();  /* magic */
-	temp = *(vuip)MCPCIA_CAP_ERR(mid); /* read to force the write */
+	*(vuip)MCPCIA_CAP_ERR(mid); /* read to force the write */
 	mcheck_expected(cpu) = 0;
 	mb();
 
@@ -572,12 +572,10 @@ mcpcia_print_system_area(unsigned long la_ptr)
 void
 mcpcia_machine_check(unsigned long vector, unsigned long la_ptr)
 {
-	struct el_common *mchk_header;
 	struct el_MCPCIA_uncorrected_frame_mcheck *mchk_logout;
 	unsigned int cpu = smp_processor_id();
 	int expected;
 
-	mchk_header = (struct el_common *)la_ptr;
 	mchk_logout = (struct el_MCPCIA_uncorrected_frame_mcheck *)la_ptr;
 	expected = mcheck_expected(cpu);
 
diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c
index c3b3781a03de..14b26c466c89 100644
--- a/arch/alpha/kernel/err_titan.c
+++ b/arch/alpha/kernel/err_titan.c
@@ -533,8 +533,6 @@ static struct el_subpacket_annotation el_titan_annotations[] = {
 static struct el_subpacket *
 el_process_regatta_subpacket(struct el_subpacket *header)
 {
-	int status;
-
 	if (header->class != EL_CLASS__REGATTA_FAMILY) {
 		printk("%s  ** Unexpected header CLASS %d TYPE %d, aborting\n",
 		       err_print_prefix,
@@ -551,7 +549,7 @@ el_process_regatta_subpacket(struct el_subpacket *header)
 		printk("%s  ** Occurred on CPU %d:\n", 
 		       err_print_prefix,
 		       (int)header->by_type.regatta_frame.cpuid);
-		status = privateer_process_logout_frame((struct el_common *)
+		privateer_process_logout_frame((struct el_common *)
 			header->by_type.regatta_frame.data_start, 1);
 		break;
 	default:
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 1479dc6ebd97..51b7fbd9e4c1 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -228,7 +228,7 @@ struct irqaction timer_irqaction = {
 void __init
 init_rtc_irq(void)
 {
-	irq_set_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
+	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
 				      handle_simple_irq, "RTC");
 	setup_irq(RTC_IRQ, &timer_irqaction);
 }
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index d2634e4476b4..edbddcbd5bc6 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -1404,8 +1404,6 @@ determine_cpu_caches (unsigned int cpu_type)
 	case PCA56_CPU:
 	case PCA57_CPU:
 	  {
-		unsigned long cbox_config, size;
-
 		if (cpu_type == PCA56_CPU) {
 			L1I = CSHAPE(16*1024, 6, 1);
 			L1D = CSHAPE(8*1024, 5, 1);
@@ -1415,10 +1413,12 @@ determine_cpu_caches (unsigned int cpu_type)
 		}
 		L3 = -1;
 
+#if 0
+		unsigned long cbox_config, size;
+
 		cbox_config = *(vulp) phys_to_virt (0xfffff00008UL);
 		size = 512*1024 * (1 << ((cbox_config >> 12) & 3));
 
-#if 0
 		L2 = ((cbox_config >> 31) & 1 ? CSHAPE (size, 6, 1) : -1);
 #else
 		L2 = external_cache_probe(512*1024, 6);
diff --git a/arch/alpha/kernel/smc37c93x.c b/arch/alpha/kernel/smc37c93x.c
index 3e6a2893af9f..6886b834f487 100644
--- a/arch/alpha/kernel/smc37c93x.c
+++ b/arch/alpha/kernel/smc37c93x.c
@@ -79,7 +79,6 @@
 static unsigned long __init SMCConfigState(unsigned long baseAddr)
 {
 	unsigned char devId;
-	unsigned char devRev;
 
 	unsigned long configPort;
 	unsigned long indexPort;
@@ -100,7 +99,7 @@ static unsigned long __init SMCConfigState(unsigned long baseAddr)
 		devId = inb(dataPort);
 		if (devId == VALID_DEVICE_ID) {
 			outb(DEVICE_REV, indexPort);
-			devRev = inb(dataPort);
+			/* unsigned char devRev = */ inb(dataPort);
 			break;
 		}
 		else
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 42aa078a5e4d..5a621c6d22ab 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs)
 
 		switch (which) {
 		case IPI_RESCHEDULE:
-			/* Reschedule callback.  Everything to be done
-			   is done by the interrupt return path.  */
+			scheduler_ipi();
 			break;
 
 		case IPI_CALL_FUNC:
diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c
index d3cb28bb8eb0..d92cdc715c65 100644
--- a/arch/alpha/kernel/sys_wildfire.c
+++ b/arch/alpha/kernel/sys_wildfire.c
@@ -156,7 +156,6 @@ static void __init
 wildfire_init_irq_per_pca(int qbbno, int pcano)
 {
 	int i, irq_bias;
-	unsigned long io_bias;
 	static struct irqaction isa_enable = {
 		.handler	= no_action,
 		.name		= "isa_enable",
@@ -165,10 +164,12 @@ wildfire_init_irq_per_pca(int qbbno, int pcano)
 	irq_bias = qbbno * (WILDFIRE_PCA_PER_QBB * WILDFIRE_IRQ_PER_PCA)
 		 + pcano * WILDFIRE_IRQ_PER_PCA;
 
+#if 0
+	unsigned long io_bias;
+
 	/* Only need the following for first PCI bus per PCA. */
 	io_bias = WILDFIRE_IO(qbbno, pcano<<1) - WILDFIRE_IO_BIAS;
 
-#if 0
 	outb(0, DMA1_RESET_REG + io_bias);
 	outb(0, DMA2_RESET_REG + io_bias);
 	outb(DMA_MODE_CASCADE, DMA2_MODE_REG + io_bias);
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9db16f..15f999d41c75 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -498,23 +498,27 @@ sys_call_table:
 	.quad sys_ni_syscall			/* sys_timerfd */
 	.quad sys_eventfd
 	.quad sys_recvmmsg
-	.quad sys_fallocate				/* 480 */
+	.quad sys_fallocate			/* 480 */
 	.quad sys_timerfd_create
 	.quad sys_timerfd_settime
 	.quad sys_timerfd_gettime
 	.quad sys_signalfd4
-	.quad sys_eventfd2				/* 485 */
+	.quad sys_eventfd2			/* 485 */
 	.quad sys_epoll_create1
 	.quad sys_dup3
 	.quad sys_pipe2
 	.quad sys_inotify_init1
-	.quad sys_preadv				/* 490 */
+	.quad sys_preadv			/* 490 */
 	.quad sys_pwritev
 	.quad sys_rt_tgsigqueueinfo
 	.quad sys_perf_event_open
 	.quad sys_fanotify_init
-	.quad sys_fanotify_mark				/* 495 */
+	.quad sys_fanotify_mark			/* 495 */
 	.quad sys_prlimit64
+	.quad sys_name_to_handle_at
+	.quad sys_open_by_handle_at
+	.quad sys_clock_adjtime
+	.quad sys_syncfs			/* 500 */
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index a58e84f1a63b..818e74ed45dc 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -153,6 +153,7 @@ void read_persistent_clock(struct timespec *ts)
 		year += 100;
 
 	ts->tv_sec = mktime(year, mon, day, hour, min, sec);
+	ts->tv_nsec = 0;
 }
 
 
@@ -374,8 +375,7 @@ static struct clocksource clocksource_rpcc = {
 
 static inline void register_rpcc_clocksource(long cycle_freq)
 {
-	clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4);
-	clocksource_register(&clocksource_rpcc);
+	clocksource_register_hz(&clocksource_rpcc, cycle_freq);
 }
 #else /* !CONFIG_SMP */
 static inline void register_rpcc_clocksource(long cycle_freq)
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 433be2a24f31..3d890a98a08b 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ SECTIONS
 	__init_end = .;
 	/* Freed after init ends here */
 
+	_sdata = .;	/* Start of rw data section */
 	_data = .;
 	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fdc9d4dbf85b..377a7a595b08 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1540,7 +1540,6 @@ config HIGHMEM
 config HIGHPTE
 	bool "Allocate 2nd-level pagetables from highmem"
 	depends on HIGHMEM
-	depends on !OUTER_CACHE
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
@@ -2012,6 +2011,8 @@ source "kernel/power/Kconfig"
 
 config ARCH_SUSPEND_POSSIBLE
 	depends on !ARCH_S5P64X0 && !ARCH_S5P6442
+	depends on CPU_ARM920T || CPU_ARM926T || CPU_SA1100 || \
+		CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE
 	def_bool y
 
 endmenu
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 494224a9b459..03d01d783e3b 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -63,17 +63,6 @@ config DEBUG_USER
 	      8 - SIGSEGV faults
 	     16 - SIGBUS faults
 
-config DEBUG_ERRORS
-	bool "Verbose kernel error messages"
-	depends on DEBUG_KERNEL
-	help
-	  This option controls verbose debugging information which can be
-	  printed when the kernel detects an internal error. This debugging
-	  information is useful to kernel hackers when tracking down problems,
-	  but mostly meaningless to other people. It's safe to say Y unless
-	  you are concerned with the code size or don't want to see these
-	  messages.
-
 config DEBUG_STACK_USAGE
 	bool "Enable stack utilization instrumentation"
 	depends on DEBUG_KERNEL
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 8ebbb511c783..0c6852d93506 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -74,7 +74,7 @@ ZTEXTADDR	:= $(CONFIG_ZBOOT_ROM_TEXT)
 ZBSSADDR	:= $(CONFIG_ZBOOT_ROM_BSS)
 else
 ZTEXTADDR	:= 0
-ZBSSADDR	:= ALIGN(4)
+ZBSSADDR	:= ALIGN(8)
 endif
 
 SEDFLAGS	= s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index adf583cd0c35..49f5b2eaaa87 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -179,15 +179,14 @@ not_angel:
 		bl	cache_on
 
 restart:	adr	r0, LC0
-		ldmia	r0, {r1, r2, r3, r5, r6, r9, r11, r12}
-		ldr	sp, [r0, #32]
+		ldmia	r0, {r1, r2, r3, r6, r9, r11, r12}
+		ldr	sp, [r0, #28]
 
 		/*
 		 * We might be running at a different address.  We need
 		 * to fix up various pointers.
 		 */
 		sub	r0, r0, r1		@ calculate the delta offset
-		add	r5, r5, r0		@ _start
 		add	r6, r6, r0		@ _edata
 
 #ifndef CONFIG_ZBOOT_ROM
@@ -206,31 +205,40 @@ restart:	adr	r0, LC0
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
- *   r5  = start of this image
  *   r9  = size of decompressed image
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
- *   r4 >= r10 -> OK
- *   r4 + image length <= r5 -> OK
+ *   r4 - 16k page directory >= r10 -> OK
+ *   r4 + image length <= current position (pc) -> OK
  */
+		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-		cmp	r10, r5
+   ARM(		cmp	r10, pc		)
+ THUMB(		mov	lr, pc		)
+ THUMB(		cmp	r10, lr		)
 		bls	wont_overwrite
 
 /*
  * Relocate ourselves past the end of the decompressed kernel.
- *   r5  = start of this image
  *   r6  = _edata
  *   r10 = end of the decompressed kernel
  * Because we always copy ahead, we need to do it from the end and go
  * backward in case the source and destination overlap.
  */
-		/* Round up to next 256-byte boundary. */
-		add	r10, r10, #256
+		/*
+		 * Bump to the next 256-byte boundary with the size of
+		 * the relocation code added. This avoids overwriting
+		 * ourself when the offset is small.
+		 */
+		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
 		bic	r10, r10, #255
 
+		/* Get start of code we want to copy and align it down. */
+		adr	r5, restart
+		bic	r5, r5, #31
+
 		sub	r9, r6, r5		@ size to copy
 		add	r9, r9, #31		@ rounded up to a multiple
 		bic	r9, r9, #31		@ ... of 32 bytes
@@ -245,6 +253,11 @@ restart:	adr	r0, LC0
 		/* Preserve offset to relocated code. */
 		sub	r6, r9, r6
 
+#ifndef CONFIG_ZBOOT_ROM
+		/* cache_clean_flush may use the stack, so relocate it */
+		add	sp, sp, r6
+#endif
+
 		bl	cache_clean_flush
 
 		adr	r0, BSYM(restart)
@@ -333,7 +346,6 @@ not_relocated:	mov	r0, #0
 LC0:		.word	LC0			@ r1
 		.word	__bss_start		@ r2
 		.word	_end			@ r3
-		.word	_start			@ r5
 		.word	_edata			@ r6
 		.word	_image_size		@ r9
 		.word	_got_start		@ r11
@@ -1062,6 +1074,7 @@ memdump:	mov	r12, r0
 #endif
 
 		.ltorg
+reloc_code_end:
 
 		.align
 		.section ".stack", "aw", %nobits
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 5309909d7282..ea80abe78844 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -54,6 +54,7 @@ SECTIONS
   .bss			: { *(.bss) }
   _end = .;
 
+  . = ALIGN(8);		/* the stack must be 64-bit aligned */
   .stack		: { *(.stack) }
 
   .stab 0		: { *(.stab) }
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index e7521bca2c35..6ea9b6f3607a 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -16,5 +16,4 @@ obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_ARCH_IXP2000)	+= uengine.o
 obj-$(CONFIG_ARCH_IXP23XX)	+= uengine.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
-obj-$(CONFIG_COMMON_CLKDEV)	+= clkdev.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index 113085a77123..7aa4262ada7a 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -22,17 +22,16 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
 #include <asm/mach/irq.h>
 #include <asm/hardware/vic.h>
 
-#if defined(CONFIG_PM)
+#ifdef CONFIG_PM
 /**
  * struct vic_device - VIC PM device
- * @sysdev: The system device which is registered.
  * @irq: The IRQ number for the base of the VIC.
  * @base: The register base for the VIC.
  * @resume_sources: A bitmask of interrupts for resume.
@@ -43,8 +42,6 @@
  * @protect: Save for VIC_PROTECT.
  */
 struct vic_device {
-	struct sys_device sysdev;
-
 	void __iomem	*base;
 	int		irq;
 	u32		resume_sources;
@@ -59,11 +56,6 @@ struct vic_device {
 static struct vic_device vic_devices[CONFIG_ARM_VIC_NR];
 
 static int vic_id;
-
-static inline struct vic_device *to_vic(struct sys_device *sys)
-{
-	return container_of(sys, struct vic_device, sysdev);
-}
 #endif /* CONFIG_PM */
 
 /**
@@ -85,10 +77,9 @@ static void vic_init2(void __iomem *base)
 	writel(32, base + VIC_PL190_DEF_VECT_ADDR);
 }
 
-#if defined(CONFIG_PM)
-static int vic_class_resume(struct sys_device *dev)
+#ifdef CONFIG_PM
+static void resume_one_vic(struct vic_device *vic)
 {
-	struct vic_device *vic = to_vic(dev);
 	void __iomem *base = vic->base;
 
 	printk(KERN_DEBUG "%s: resuming vic at %p\n", __func__, base);
@@ -107,13 +98,18 @@ static int vic_class_resume(struct sys_device *dev)
 
 	writel(vic->soft_int, base + VIC_INT_SOFT);
 	writel(~vic->soft_int, base + VIC_INT_SOFT_CLEAR);
+}
 
-	return 0;
+static void vic_resume(void)
+{
+	int id;
+
+	for (id = vic_id - 1; id >= 0; id--)
+		resume_one_vic(vic_devices + id);
 }
 
-static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
+static void suspend_one_vic(struct vic_device *vic)
 {
-	struct vic_device *vic = to_vic(dev);
 	void __iomem *base = vic->base;
 
 	printk(KERN_DEBUG "%s: suspending vic at %p\n", __func__, base);
@@ -128,14 +124,21 @@ static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
 
 	writel(vic->resume_irqs, base + VIC_INT_ENABLE);
 	writel(~vic->resume_irqs, base + VIC_INT_ENABLE_CLEAR);
+}
+
+static int vic_suspend(void)
+{
+	int id;
+
+	for (id = 0; id < vic_id; id++)
+		suspend_one_vic(vic_devices + id);
 
 	return 0;
 }
 
-struct sysdev_class vic_class = {
-	.name		= "vic",
-	.suspend	= vic_class_suspend,
-	.resume		= vic_class_resume,
+struct syscore_ops vic_syscore_ops = {
+	.suspend	= vic_suspend,
+	.resume		= vic_resume,
 };
 
 /**
@@ -147,30 +150,8 @@ struct sysdev_class vic_class = {
 */
 static int __init vic_pm_init(void)
 {
-	struct vic_device *dev = vic_devices;
-	int err;
-	int id;
-
-	if (vic_id == 0)
-		return 0;
-
-	err = sysdev_class_register(&vic_class);
-	if (err) {
-		printk(KERN_ERR "%s: cannot register class\n", __func__);
-		return err;
-	}
-
-	for (id = 0; id < vic_id; id++, dev++) {
-		dev->sysdev.id = id;
-		dev->sysdev.cls = &vic_class;
-
-		err = sysdev_register(&dev->sysdev);
-		if (err) {
-			printk(KERN_ERR "%s: failed to register device\n",
-			       __func__);
-			return err;
-		}
-	}
+	if (vic_id > 0)
+		register_syscore_ops(&vic_syscore_ops);
 
 	return 0;
 }
diff --git a/arch/arm/configs/at91x40_defconfig b/arch/arm/configs/at91x40_defconfig
new file mode 100644
index 000000000000..c55e9212fcbb
--- /dev/null
+++ b/arch/arm/configs/at91x40_defconfig
@@ -0,0 +1,48 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EMBEDDED=y
+# CONFIG_HOTPLUG is not set
+# CONFIG_ELF_CORE is not set
+# CONFIG_FUTEX is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
+CONFIG_ARCH_AT91=y
+CONFIG_ARCH_AT91X40=y
+CONFIG_MACH_AT91EB01=y
+CONFIG_AT91_EARLY_USART0=y
+CONFIG_CPU_ARM7TDMI=y
+CONFIG_SET_MEM_PARAM=y
+CONFIG_DRAM_BASE=0x01000000
+CONFIG_DRAM_SIZE=0x00400000
+CONFIG_FLASH_MEM_BASE=0x01400000
+CONFIG_PROCESSOR_ID=0x14000040
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_BINFMT_FLAT=y
+# CONFIG_SUSPEND is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_RAM=y
+CONFIG_MTD_ROM=y
+CONFIG_BLK_DEV_RAM=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+# CONFIG_DNOTIFY is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index ed5bc9e05a4e..cd4458f64171 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -2,6 +2,7 @@
 #define __ASM_ARM_CPUTYPE_H
 
 #include <linux/stringify.h>
+#include <linux/kernel.h>
 
 #define CPUID_ID	0
 #define CPUID_CACHETYPE	1
diff --git a/arch/arm/include/asm/i8253.h b/arch/arm/include/asm/i8253.h
new file mode 100644
index 000000000000..70656b69d5ce
--- /dev/null
+++ b/arch/arm/include/asm/i8253.h
@@ -0,0 +1,15 @@
+#ifndef __ASMARM_I8253_H
+#define __ASMARM_I8253_H
+
+/* i8253A PIT registers */
+#define PIT_MODE	0x43
+#define PIT_CH0		0x40
+
+#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
+
+extern raw_spinlock_t i8253_lock;
+
+#define outb_pit	outb_p
+#define inb_pit		inb_p
+
+#endif
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index bb8a19bd5822..e46bdd0097eb 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -39,10 +39,13 @@ typedef u32 kprobe_opcode_t;
 struct kprobe;
 typedef void (kprobe_insn_handler_t)(struct kprobe *, struct pt_regs *);
 
+typedef unsigned long (kprobe_check_cc)(unsigned long);
+
 /* Architecture specific copy of original instruction. */
 struct arch_specific_insn {
 	kprobe_opcode_t		*insn;
 	kprobe_insn_handler_t	*insn_handler;
+	kprobe_check_cc		*insn_check_cc;
 };
 
 struct prev_kprobe {
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 883f6be5117a..d5adaae5ee2c 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -34,7 +34,6 @@
  *   timer interrupt which may be pending.
  */
 struct sys_timer {
-	struct sys_device	dev;
 	void			(*init)(void);
 	void			(*suspend)(void);
 	void			(*resume)(void);
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 885be097769d..832888d0c20c 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -159,7 +159,7 @@ extern unsigned int user_debug;
 #include <mach/barriers.h>
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
 #define mb()		do { dsb(); outer_sync(); } while (0)
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		mb()
 #else
 #include <asm/memory.h>
diff --git a/arch/arm/include/asm/thread_notify.h b/arch/arm/include/asm/thread_notify.h
index c4391ba20350..1dc980675894 100644
--- a/arch/arm/include/asm/thread_notify.h
+++ b/arch/arm/include/asm/thread_notify.h
@@ -43,6 +43,7 @@ static inline void thread_notify(unsigned long rc, struct thread_info *thread)
 #define THREAD_NOTIFY_FLUSH	0
 #define THREAD_NOTIFY_EXIT	1
 #define THREAD_NOTIFY_SWITCH	2
+#define THREAD_NOTIFY_COPY	3
 
 #endif
 #endif
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index c891eb76c0e3..87dbe3e21970 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -396,6 +396,10 @@
 #define __NR_fanotify_init		(__NR_SYSCALL_BASE+367)
 #define __NR_fanotify_mark		(__NR_SYSCALL_BASE+368)
 #define __NR_prlimit64			(__NR_SYSCALL_BASE+369)
+#define __NR_name_to_handle_at		(__NR_SYSCALL_BASE+370)
+#define __NR_open_by_handle_at		(__NR_SYSCALL_BASE+371)
+#define __NR_clock_adjtime		(__NR_SYSCALL_BASE+372)
+#define __NR_syncfs			(__NR_SYSCALL_BASE+373)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 74554f1742d7..8d95446150a3 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_MODULES)		+= armksyms.o module.o
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
-obj-$(CONFIG_PM)		+= sleep.o
+obj-$(CONFIG_PM_SLEEP)		+= sleep.o
 obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
 obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 5c26eccef998..7fbf28c35bb2 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -379,6 +379,10 @@
 		CALL(sys_fanotify_init)
 		CALL(sys_fanotify_mark)
 		CALL(sys_prlimit64)
+/* 370 */	CALL(sys_name_to_handle_at)
+		CALL(sys_open_by_handle_at)
+		CALL(sys_clock_adjtime)
+		CALL(sys_syncfs)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c
index d4a0da1e48f4..9b05c6a0dcea 100644
--- a/arch/arm/kernel/elf.c
+++ b/arch/arm/kernel/elf.c
@@ -40,15 +40,22 @@ EXPORT_SYMBOL(elf_check_arch);
 void elf_set_personality(const struct elf32_hdr *x)
 {
 	unsigned int eflags = x->e_flags;
-	unsigned int personality = PER_LINUX_32BIT;
+	unsigned int personality = current->personality & ~PER_MASK;
+
+	/*
+	 * We only support Linux ELF executables, so always set the
+	 * personality to LINUX.
+	 */
+	personality |= PER_LINUX;
 
 	/*
 	 * APCS-26 is only valid for OABI executables
 	 */
-	if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN) {
-		if (eflags & EF_ARM_APCS_26)
-			personality = PER_LINUX;
-	}
+	if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN &&
+	    (eflags & EF_ARM_APCS_26))
+		personality &= ~ADDR_LIMIT_32BIT;
+	else
+		personality |= ADDR_LIMIT_32BIT;
 
 	set_personality(personality);
 
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 8dbc126f7152..87acc25d7a3e 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -868,6 +868,13 @@ static void reset_ctrl_regs(void *info)
 		 */
 		asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
 		isb();
+
+		/*
+		 * Clear any configured vector-catch events before
+		 * enabling monitor mode.
+		 */
+		asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
+		isb();
 	}
 
 	if (enable_monitor_mode())
diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 23891317dc4b..15eeff6aea0e 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -34,9 +34,6 @@
  *
  *   *) If the PC is written to by the instruction, the
  *      instruction must be fully simulated in software.
- *      If it is a conditional instruction, the handler
- *      will use insn[0] to copy its condition code to
- *	set r0 to 1 and insn[1] to "mov pc, lr" to return.
  *
  *   *) Otherwise, a modified form of the instruction is
  *      directly executed.  Its handler calls the
@@ -68,13 +65,17 @@
 
 #define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25)
 
+#define is_r15(insn, bitpos) (((insn) & (0xf << bitpos)) == (0xf << bitpos))
+
+/*
+ * Test if load/store instructions writeback the address register.
+ * if P (bit 24) == 0 or W (bit 21) == 1
+ */
+#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000)
+
 #define PSR_fs	(PSR_f|PSR_s)
 
 #define KPROBE_RETURN_INSTRUCTION	0xe1a0f00e	/* mov pc, lr */
-#define SET_R0_TRUE_INSTRUCTION		0xe3a00001	/* mov	r0, #1 */
-
-#define	truecc_insn(insn)	(((insn) & 0xf0000000) | \
-				 (SET_R0_TRUE_INSTRUCTION & 0x0fffffff))
 
 typedef long (insn_0arg_fn_t)(void);
 typedef long (insn_1arg_fn_t)(long);
@@ -419,14 +420,10 @@ insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr,
 
 static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	long iaddr = (long)p->addr;
 	int disp  = branch_displacement(insn);
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	if (insn & (1 << 24))
 		regs->ARM_lr = iaddr + 4;
 
@@ -446,14 +443,10 @@ static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs)
 
 static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	int rm = insn & 0xf;
 	long rmv = regs->uregs[rm];
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	if (insn & (1 << 5))
 		regs->ARM_lr = (long)p->addr + 4;
 
@@ -463,9 +456,16 @@ static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
 		regs->ARM_cpsr |= PSR_T_BIT;
 }
 
+static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
+	regs->uregs[rd] = regs->ARM_cpsr & mask;
+}
+
 static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	int rn = (insn >> 16) & 0xf;
 	int lbit = insn & (1 << 20);
@@ -476,9 +476,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 	int reg_bit_vector;
 	int reg_count;
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	reg_count = 0;
 	reg_bit_vector = insn & 0xffff;
 	while (reg_bit_vector) {
@@ -510,11 +507,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 
 static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	regs->ARM_pc = (long)p->addr + str_pc_offset;
 	simulate_ldm1stm1(p, regs);
 	regs->ARM_pc = (long)p->addr + 4;
@@ -525,24 +517,16 @@ static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs)
 	regs->uregs[12] = regs->uregs[13];
 }
 
-static void __kprobes emulate_ldcstc(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = regs->uregs[rn];
-
-	/* Save Rn in case of writeback. */
-	regs->uregs[rn] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn);
-}
-
 static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;  /* rm may be invalid, don't care. */
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
 
 	/* Not following the C calling convention here, so need asm(). */
 	__asm__ __volatile__ (
@@ -554,29 +538,36 @@ static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 		"str	r0, %[rn]	\n\t"	/* in case of writeback */
 		"str	r2, %[rd0]	\n\t"
 		"str	r3, %[rd1]	\n\t"
-		: [rn]  "+m" (regs->uregs[rn]),
+		: [rn]  "+m" (rnv),
 		  [rd0] "=m" (regs->uregs[rd]),
 		  [rd1] "=m" (regs->uregs[rd+1])
-		: [rm]   "m" (regs->uregs[rm]),
+		: [rm]   "m" (rmv),
 		  [cpsr] "r" (regs->ARM_cpsr),
 		  [i_fn] "r" (i_fn)
 		: "r0", "r1", "r2", "r3", "lr", "cc"
 	);
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv;
 }
 
 static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_4arg_fn_t *i_fn = (insn_4arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm  = insn & 0xf;
-	long rnv = regs->uregs[rn];
-	long rmv = regs->uregs[rm];  /* rm/rmv may be invalid, don't care. */
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
+	/* rm/rmv may be invalid, don't care. */
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rnv_wb;
 
-	regs->uregs[rn] = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
+	rnv_wb = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
 					       regs->uregs[rd+1],
 					       regs->ARM_cpsr, i_fn);
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv_wb;
 }
 
 static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs)
@@ -630,31 +621,6 @@ static void __kprobes emulate_str(struct kprobe *p, struct pt_regs *regs)
 		regs->uregs[rn] = rnv_wb;  /* Save Rn in case of writeback. */
 }
 
-static void __kprobes emulate_mrrc(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_llret_0arg_fn_t *i_fn = (insn_llret_0arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	union reg_pair fnr;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-
-	fnr.dr = insnslot_llret_0arg_rflags(regs->ARM_cpsr, i_fn);
-	regs->uregs[rn] = fnr.r0;
-	regs->uregs[rd] = fnr.r1;
-}
-
-static void __kprobes emulate_mcrr(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = regs->uregs[rn];
-	long rdv = regs->uregs[rd];
-
-	insnslot_2arg_rflags(rnv, rdv, regs->ARM_cpsr, i_fn);
-}
-
 static void __kprobes emulate_sat(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
@@ -688,32 +654,32 @@ static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs)
 	insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_rd12(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes emulate_nop(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-
-	regs->uregs[rd] = insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_ird12(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes
+emulate_rd12_modify(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
-	int ird = (insn >> 12) & 0xf;
+	int rd = (insn >> 12) & 0xf;
+	long rdv = regs->uregs[rd];
 
-	insnslot_1arg_rflags(regs->uregs[ird], regs->ARM_cpsr, i_fn);
+	regs->uregs[rd] = insnslot_1arg_rflags(rdv, regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_rn16(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes
+emulate_rd12rn0_modify(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
+	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
+	int rd = (insn >> 12) & 0xf;
+	int rn = insn & 0xf;
+	long rdv = regs->uregs[rd];
 	long rnv = regs->uregs[rn];
 
-	insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn);
+	regs->uregs[rd] = insnslot_2arg_rflags(rdv, rnv, regs->ARM_cpsr, i_fn);
 }
 
 static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs)
@@ -819,6 +785,17 @@ emulate_alu_imm_rwflags(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
+emulate_alu_tests_imm(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	int rn = (insn >> 16) & 0xf;
+	long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn];
+
+	insnslot_1arg_rwflags(rnv, &regs->ARM_cpsr, i_fn);
+}
+
+static void __kprobes
 emulate_alu_rflags(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
@@ -854,14 +831,34 @@ emulate_alu_rwflags(struct kprobe *p, struct pt_regs *regs)
 		insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
 }
 
+static void __kprobes
+emulate_alu_tests(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
+	int rn = (insn >> 16) & 0xf;
+	int rs = (insn >> 8) & 0xf;	/* rs/rsv may be invalid, don't care. */
+	int rm = insn & 0xf;
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rsv = regs->uregs[rs];
+
+	insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
+}
+
 static enum kprobe_insn __kprobes
 prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	int ibit = (insn & (1 << 26)) ? 25 : 22;
+	int not_imm = (insn & (1 << 26)) ? (insn & (1 << 25))
+					 : (~insn & (1 << 22));
+
+	if (is_writeback(insn) && is_r15(insn, 16))
+		return INSN_REJECTED;	/* Writeback to PC */
 
 	insn &= 0xfff00fff;
 	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
-	if (insn & (1 << ibit)) {
+	if (not_imm) {
 		insn &= ~0xf;
 		insn |= 2;	/* Rm = r2 */
 	}
@@ -871,20 +868,40 @@ prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 }
 
 static enum kprobe_insn __kprobes
-prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+prep_emulate_rd12_modify(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0fff;	/* Rd = r0 */
 	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12rm0;
+	asi->insn_handler = emulate_rd12_modify;
 	return INSN_GOOD;
 }
 
 static enum kprobe_insn __kprobes
-prep_emulate_rd12(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+prep_emulate_rd12rn0_modify(kprobe_opcode_t insn,
+			    struct arch_specific_insn *asi)
 {
-	insn &= 0xffff0fff;	/* Rd = r0 */
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0ff0;	/* Rd = r0 */
+	insn |= 0x00000001;	/* Rn = r1 */
+	asi->insn[0] = insn;
+	asi->insn_handler = emulate_rd12rn0_modify;
+	return INSN_GOOD;
+}
+
+static enum kprobe_insn __kprobes
+prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
 	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12;
+	asi->insn_handler = emulate_rd12rm0;
 	return INSN_GOOD;
 }
 
@@ -892,6 +909,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn,
 				struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
 	insn |= 0x00000001;	/* Rm = r1 */
 	asi->insn[0] = insn;
@@ -903,6 +923,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd16rs8rm0_wflags(kprobe_opcode_t insn,
 			       struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff0f0f0;	/* Rd = r0, Rs = r0 */
 	insn |= 0x00000001;	/* Rm = r1          */
 	asi->insn[0] = insn;
@@ -914,6 +937,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd16rn12rs8rm0_wflags(kprobe_opcode_t insn,
 				   struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff000f0;	/* Rd = r0, Rn = r0 */
 	insn |= 0x00000102;	/* Rs = r1, Rm = r2 */
 	asi->insn[0] = insn;
@@ -925,6 +951,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn,
 				       struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16) || is_r15(insn, 12))
+		return INSN_REJECTED;	/* RdHi or RdLo is PC */
+
 	insn &= 0xfff000f0;	/* RdHi = r0, RdLo = r1 */
 	insn |= 0x00001203;	/* Rs = r2, Rm = r3 */
 	asi->insn[0] = insn;
@@ -945,20 +974,13 @@ prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn,
 static enum kprobe_insn __kprobes
 space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	/* CPS mmod == 1 : 1111 0001 0000 xx10 xxxx xxxx xx0x xxxx */
-	/* RFE           : 1111 100x x0x1 xxxx xxxx 1010 xxxx xxxx */
-	/* SRS           : 1111 100x x1x0 1101 xxxx 0101 xxxx xxxx */
-	if ((insn & 0xfff30020) == 0xf1020000 ||
-	    (insn & 0xfe500f00) == 0xf8100a00 ||
-	    (insn & 0xfe5f0f00) == 0xf84d0500)
-		return INSN_REJECTED;
-
-	/* PLD : 1111 01x1 x101 xxxx xxxx xxxx xxxx xxxx : */
-	if ((insn & 0xfd700000) == 0xf4500000) {
-		insn &= 0xfff0ffff;	/* Rn = r0 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_rn16;
-		return INSN_GOOD;
+	/* memory hint : 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLDI        : 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLDW        : 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLD         : 1111 0101 x101 xxxx xxxx xxxx xxxx xxxx : */
+	if ((insn & 0xfe300000) == 0xf4100000) {
+		asi->insn_handler = emulate_nop;
+		return INSN_GOOD_NO_SLOT;
 	}
 
 	/* BLX(1) : 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx : */
@@ -967,41 +989,22 @@ space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		return INSN_GOOD_NO_SLOT;
 	}
 
-	/* SETEND : 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
-	/* CDP2   : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	if ((insn & 0xffff00f0) == 0xf1010000 ||
-	    (insn & 0xff000010) == 0xfe000000) {
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_none;
-		return INSN_GOOD;
-	}
+	/* CPS   : 1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */
+	/* SETEND: 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
 
+	/* SRS   : 1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE   : 1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
+
+	/* Coprocessor instructions... */
 	/* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
 	/* MRRC2 : 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
-	if ((insn & 0xffe00000) == 0xfc400000) {
-		insn &= 0xfff00fff;	/* Rn = r0 */
-		insn |= 0x00001000;	/* Rd = r1 */
-		asi->insn[0] = insn;
-		asi->insn_handler =
-			(insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr;
-		return INSN_GOOD;
-	}
+	/* LDC2  : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC2  : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP2  : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR2  : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC2  : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
 
-	/* LDC2 : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC2 : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	if ((insn & 0xfe000000) == 0xfc000000) {
-		insn &= 0xfff0ffff;      /* Rn = r0 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_ldcstc;
-		return INSN_GOOD;
-	}
-
-	/* MCR2 : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC2 : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0]      = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12;
-	return INSN_GOOD;
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1010,19 +1013,18 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */
 	if ((insn & 0x0f900010) == 0x01000000) {
 
-		/* BXJ  : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
-		/* MSR  : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0ff000f0) == 0x01200020 ||
-		    (insn & 0x0fb000f0) == 0x01200000)
-			return INSN_REJECTED;
-
-		/* MRS : cccc 0001 0x00 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0fb00010) == 0x01000000)
-			return prep_emulate_rd12(insn, asi);
+		/* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
+		if ((insn & 0x0ff000f0) == 0x01000000) {
+			if (is_r15(insn, 12))
+				return INSN_REJECTED;	/* Rd is PC */
+			asi->insn_handler = simulate_mrs;
+			return INSN_GOOD_NO_SLOT;
+		}
 
 		/* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
 		if ((insn & 0x0ff00090) == 0x01400080)
-			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
+			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
+									asi);
 
 		/* SMULWy : cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */
 		/* SMULxy : cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */
@@ -1031,24 +1033,29 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
 
 		/* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */
-		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 0x00 xxxx : Q */
-		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx : Q */
+		if ((insn & 0x0ff00090) == 0x01000080 ||
+		    (insn & 0x0ff000b0) == 0x01200080)
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+
+		/* BXJ      : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
+		/* MSR      : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
+		/* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
 
+		/* Other instruction encodings aren't yet defined */
+		return INSN_REJECTED;
 	}
 
 	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx 0xx1 xxxx */
 	else if ((insn & 0x0f900090) == 0x01000010) {
 
-		/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-		if ((insn & 0xfff000f0) == 0xe1200070)
-			return INSN_REJECTED;
-
 		/* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
 		/* BX     : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
 		if ((insn & 0x0ff000d0) == 0x01200010) {
-			asi->insn[0] = truecc_insn(insn);
+			if ((insn & 0x0ff000ff) == 0x0120003f)
+				return INSN_REJECTED; /* BLX pc */
 			asi->insn_handler = simulate_blx2bx;
-			return INSN_GOOD;
+			return INSN_GOOD_NO_SLOT;
 		}
 
 		/* CLZ : cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */
@@ -1059,17 +1066,27 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* QSUB    : cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx :Q */
 		/* QDADD   : cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx :Q */
 		/* QDSUB   : cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx :Q */
-		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+		if ((insn & 0x0f9000f0) == 0x01000050)
+			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+
+		/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
+		/* SMC  : cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */
+
+		/* Other instruction encodings aren't yet defined */
+		return INSN_REJECTED;
 	}
 
 	/* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */
-	else if ((insn & 0x0f000090) == 0x00000090) {
+	else if ((insn & 0x0f0000f0) == 0x00000090) {
 
 		/* MUL    : cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx :   */
 		/* MULS   : cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* MLA    : cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx :   */
 		/* MLAS   : cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* UMAAL  : cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx :   */
+		/* undef  : cccc 0000 0101 xxxx xxxx xxxx 1001 xxxx :   */
+		/* MLS    : cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx :   */
+		/* undef  : cccc 0000 0111 xxxx xxxx xxxx 1001 xxxx :   */
 		/* UMULL  : cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx :   */
 		/* UMULLS : cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* UMLAL  : cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx :   */
@@ -1078,13 +1095,15 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* SMLAL  : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx :   */
 		/* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */
-		if ((insn & 0x0fe000f0) == 0x00000090) {
-		       return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-		} else if  ((insn & 0x0fe000f0) == 0x00200090) {
-		       return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-		} else {
-		       return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
-		}
+		if ((insn & 0x00d00000) == 0x00500000)
+			return INSN_REJECTED;
+		else if ((insn & 0x00e00000) == 0x00000000)
+			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+		else if ((insn & 0x00a00000) == 0x00200000)
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+		else
+			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
+									asi);
 	}
 
 	/* cccc 000x xxxx xxxx xxxx xxxx xxxx 1xx1 xxxx */
@@ -1092,23 +1111,45 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		/* SWP   : cccc 0001 0000 xxxx xxxx xxxx 1001 xxxx */
 		/* SWPB  : cccc 0001 0100 xxxx xxxx xxxx 1001 xxxx */
-		/* LDRD  : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */
-		/* STRD  : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */
+		/* ???   : cccc 0001 0x01 xxxx xxxx xxxx 1001 xxxx */
+		/* ???   : cccc 0001 0x10 xxxx xxxx xxxx 1001 xxxx */
+		/* ???   : cccc 0001 0x11 xxxx xxxx xxxx 1001 xxxx */
 		/* STREX : cccc 0001 1000 xxxx xxxx xxxx 1001 xxxx */
 		/* LDREX : cccc 0001 1001 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXD: cccc 0001 1010 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXD: cccc 0001 1011 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXB: cccc 0001 1100 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXB: cccc 0001 1101 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXH: cccc 0001 1110 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXH: cccc 0001 1111 xxxx xxxx xxxx 1001 xxxx */
+
+		/* LDRD  : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */
+		/* STRD  : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */
 		/* LDRH  : cccc 000x xxx1 xxxx xxxx xxxx 1011 xxxx */
 		/* STRH  : cccc 000x xxx0 xxxx xxxx xxxx 1011 xxxx */
 		/* LDRSB : cccc 000x xxx1 xxxx xxxx xxxx 1101 xxxx */
 		/* LDRSH : cccc 000x xxx1 xxxx xxxx xxxx 1111 xxxx */
-		if ((insn & 0x0fb000f0) == 0x01000090) {
-			/* SWP/SWPB */
-			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+		if ((insn & 0x0f0000f0) == 0x01000090) {
+			if ((insn & 0x0fb000f0) == 0x01000090) {
+				/* SWP/SWPB */
+				return prep_emulate_rd12rn16rm0_wflags(insn,
+									asi);
+			} else {
+				/* STREX/LDREX variants and unallocaed space */
+				return INSN_REJECTED;
+			}
+
 		} else if ((insn & 0x0e1000d0) == 0x00000d0) {
 			/* STRD/LDRD */
+			if ((insn & 0x0000e000) == 0x0000e000)
+				return INSN_REJECTED;	/* Rd is LR or PC */
+			if (is_writeback(insn) && is_r15(insn, 16))
+				return INSN_REJECTED;	/* Writeback to PC */
+
 			insn &= 0xfff00fff;
 			insn |= 0x00002000;	/* Rn = r0, Rd = r2 */
-			if (insn & (1 << 22)) {
-				/* I bit */
+			if (!(insn & (1 << 22))) {
+				/* Register index */
 				insn &= ~0xf;
 				insn |= 1;	/* Rm = r1 */
 			}
@@ -1118,6 +1159,9 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 			return INSN_GOOD;
 		}
 
+		/* LDRH/STRH/LDRSB/LDRSH */
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		return prep_emulate_ldr_str(insn, asi);
 	}
 
@@ -1125,7 +1169,7 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/*
 	 * ALU op with S bit and Rd == 15 :
-	 * 	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx
+	 *	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx
 	 */
 	if ((insn & 0x0e10f000) == 0x0010f000)
 		return INSN_REJECTED;
@@ -1154,22 +1198,61 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		insn |= 0x00000200;     /* Rs = r2 */
 	}
 	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
+
+	if ((insn & 0x0f900000) == 0x01100000) {
+		/*
+		 * TST : cccc 0001 0001 xxxx xxxx xxxx xxxx xxxx
+		 * TEQ : cccc 0001 0011 xxxx xxxx xxxx xxxx xxxx
+		 * CMP : cccc 0001 0101 xxxx xxxx xxxx xxxx xxxx
+		 * CMN : cccc 0001 0111 xxxx xxxx xxxx xxxx xxxx
+		 */
+		asi->insn_handler = emulate_alu_tests;
+	} else {
+		/* ALU ops which write to Rd */
+		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
 				emulate_alu_rwflags : emulate_alu_rflags;
+	}
 	return INSN_GOOD;
 }
 
 static enum kprobe_insn __kprobes
 space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	/* MOVW  : cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
+	/* MOVT  : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
+	if ((insn & 0x0fb00000) == 0x03000000)
+		return prep_emulate_rd12_modify(insn, asi);
+
+	/* hints : cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
+	if ((insn & 0x0fff0000) == 0x03200000) {
+		unsigned op2 = insn & 0x000000ff;
+		if (op2 == 0x01 || op2 == 0x04) {
+			/* YIELD : cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
+			/* SEV   : cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
+			asi->insn[0] = insn;
+			asi->insn_handler = emulate_none;
+			return INSN_GOOD;
+		} else if (op2 <= 0x03) {
+			/* NOP   : cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
+			/* WFE   : cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
+			/* WFI   : cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
+			/*
+			 * We make WFE and WFI true NOPs to avoid stalls due
+			 * to missing events whilst processing the probe.
+			 */
+			asi->insn_handler = emulate_nop;
+			return INSN_GOOD_NO_SLOT;
+		}
+		/* For DBG and unallocated hints it's safest to reject them */
+		return INSN_REJECTED;
+	}
+
 	/*
 	 * MSR   : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx
-	 * Undef : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx
 	 * ALU op with S bit and Rd == 15 :
 	 *	   cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx
 	 */
 	if ((insn & 0x0fb00000) == 0x03200000 ||	/* MSR */
-	    (insn & 0x0ff00000) == 0x03400000 ||	/* Undef */
 	    (insn & 0x0e10f000) == 0x0210f000)		/* ALU s-bit, R15  */
 		return INSN_REJECTED;
 
@@ -1180,10 +1263,22 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	 * *S (bit 20) updates condition codes
 	 * ADC/SBC/RSC reads the C flag
 	 */
-	insn &= 0xffff0fff;	/* Rd = r0 */
+	insn &= 0xfff00fff;	/* Rn = r0 and Rd = r0 */
 	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
+
+	if ((insn & 0x0f900000) == 0x03100000) {
+		/*
+		 * TST : cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx
+		 * TEQ : cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx
+		 * CMP : cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx
+		 * CMN : cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx
+		 */
+		asi->insn_handler = emulate_alu_tests_imm;
+	} else {
+		/* ALU ops which write to Rd */
+		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
 			emulate_alu_imm_rwflags : emulate_alu_imm_rflags;
+	}
 	return INSN_GOOD;
 }
 
@@ -1192,6 +1287,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* SEL : cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx GE: !!! */
 	if ((insn & 0x0ff000f0) == 0x068000b0) {
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
 		insn |= 0x00000001;	/* Rm = r1 */
 		asi->insn[0] = insn;
@@ -1205,6 +1302,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* USAT16 : cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx :Q */
 	if ((insn & 0x0fa00030) == 0x06a00010 ||
 	    (insn & 0x0fb000f0) == 0x06a00030) {
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
 		asi->insn[0] = insn;
 		asi->insn_handler = emulate_sat;
@@ -1213,57 +1312,101 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/* REV    : cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */
 	/* REV16  : cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */
+	/* RBIT   : cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */
 	/* REVSH  : cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */
 	if ((insn & 0x0ff00070) == 0x06b00030 ||
-	    (insn & 0x0ff000f0) == 0x06f000b0)
+	    (insn & 0x0ff00070) == 0x06f00030)
 		return prep_emulate_rd12rm0(insn, asi);
 
+	/* ???       : cccc 0110 0000 xxxx xxxx xxxx xxx1 xxxx :   */
 	/* SADD16    : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */
 	/* SADDSUBX  : cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx :GE */
 	/* SSUBADDX  : cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx :GE */
 	/* SSUB16    : cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx :GE */
 	/* SADD8     : cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx :GE */
+	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1101 xxxx :   */
 	/* SSUB8     : cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx :GE */
 	/* QADD16    : cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx :   */
 	/* QADDSUBX  : cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx :   */
 	/* QSUBADDX  : cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx :   */
 	/* QSUB16    : cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx :   */
 	/* QADD8     : cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1101 xxxx :   */
 	/* QSUB8     : cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx :   */
 	/* SHADD16   : cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx :   */
 	/* SHADDSUBX : cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx :   */
 	/* SHSUBADDX : cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx :   */
 	/* SHSUB16   : cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SHADD8    : cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1101 xxxx :   */
 	/* SHSUB8    : cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx :   */
+	/* ???       : cccc 0110 0100 xxxx xxxx xxxx xxx1 xxxx :   */
 	/* UADD16    : cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx :GE */
 	/* UADDSUBX  : cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx :GE */
 	/* USUBADDX  : cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx :GE */
 	/* USUB16    : cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx :GE */
 	/* UADD8     : cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx :GE */
+	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1101 xxxx :   */
 	/* USUB8     : cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx :GE */
 	/* UQADD16   : cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx :   */
 	/* UQADDSUBX : cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx :   */
 	/* UQSUBADDX : cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx :   */
 	/* UQSUB16   : cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UQADD8    : cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1101 xxxx :   */
 	/* UQSUB8    : cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx :   */
 	/* UHADD16   : cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx :   */
 	/* UHADDSUBX : cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx :   */
 	/* UHSUBADDX : cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx :   */
 	/* UHSUB16   : cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UHADD8    : cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1101 xxxx :   */
 	/* UHSUB8    : cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx :   */
+	if ((insn & 0x0f800010) == 0x06000010) {
+		if ((insn & 0x00300000) == 0x00000000 ||
+		    (insn & 0x000000e0) == 0x000000a0 ||
+		    (insn & 0x000000e0) == 0x000000c0)
+			return INSN_REJECTED;	/* Unallocated space */
+		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	}
+
 	/* PKHBT     : cccc 0110 1000 xxxx xxxx xxxx x001 xxxx :   */
 	/* PKHTB     : cccc 0110 1000 xxxx xxxx xxxx x101 xxxx :   */
+	if ((insn & 0x0ff00030) == 0x06800010)
+		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+
 	/* SXTAB16   : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SXTB      : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTB16    : cccc 0110 1000 1111 xxxx xxxx 0111 xxxx :   */
+	/* ???       : cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SXTAB     : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTB      : cccc 0110 1010 1111 xxxx xxxx 0111 xxxx :   */
 	/* SXTAH     : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTH      : cccc 0110 1011 1111 xxxx xxxx 0111 xxxx :   */
 	/* UXTAB16   : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx :   */
+	/* UXTB16    : cccc 0110 1100 1111 xxxx xxxx 0111 xxxx :   */
+	/* ???       : cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UXTAB     : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx :   */
+	/* UXTB      : cccc 0110 1110 1111 xxxx xxxx 0111 xxxx :   */
 	/* UXTAH     : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx :   */
-	return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	/* UXTH      : cccc 0110 1111 1111 xxxx xxxx 0111 xxxx :   */
+	if ((insn & 0x0f8000f0) == 0x06800070) {
+		if ((insn & 0x00300000) == 0x00100000)
+			return INSN_REJECTED;	/* Unallocated space */
+
+		if ((insn & 0x000f0000) == 0x000f0000)
+			return prep_emulate_rd12rm0(insn, asi);
+		else
+			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	}
+
+	/* Other instruction encodings aren't yet defined */
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1273,29 +1416,49 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	if ((insn & 0x0ff000f0) == 0x03f000f0)
 		return INSN_REJECTED;
 
-	/* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */
-	/* USAD8  : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */
-	if ((insn & 0x0ff000f0) == 0x07800010)
-		 return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-
 	/* SMLALD : cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */
 	/* SMLSLD : cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */
 	if ((insn & 0x0ff00090) == 0x07400010)
 		return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
 
 	/* SMLAD  : cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx :Q */
+	/* SMUAD  : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */
 	/* SMLSD  : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :Q */
+	/* SMUSD  : cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx :  */
 	/* SMMLA  : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx :  */
-	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
+	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
+	/* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx :  */
+	/* USAD8  : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx :  */
 	if ((insn & 0x0ff00090) == 0x07000010 ||
 	    (insn & 0x0ff000d0) == 0x07500010 ||
-	    (insn & 0x0ff000d0) == 0x075000d0)
+	    (insn & 0x0ff000f0) == 0x07800010) {
+
+		if ((insn & 0x0000f000) == 0x0000f000)
+			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+		else
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+	}
+
+	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
+	if ((insn & 0x0ff000d0) == 0x075000d0)
 		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
 
-	/* SMUSD  : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :  */
-	/* SMUAD  : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */
-	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
-	return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+	/* SBFX   : cccc 0111 101x xxxx xxxx xxxx x101 xxxx :  */
+	/* UBFX   : cccc 0111 111x xxxx xxxx xxxx x101 xxxx :  */
+	if ((insn & 0x0fa00070) == 0x07a00050)
+		return prep_emulate_rd12rm0(insn, asi);
+
+	/* BFI    : cccc 0111 110x xxxx xxxx xxxx x001 xxxx :  */
+	/* BFC    : cccc 0111 110x xxxx xxxx xxxx x001 1111 :  */
+	if ((insn & 0x0fe00070) == 0x07c00010) {
+
+		if ((insn & 0x0000000f) == 0x0000000f)
+			return prep_emulate_rd12_modify(insn, asi);
+		else
+			return prep_emulate_rd12rn0_modify(insn, asi);
+	}
+
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1309,6 +1472,10 @@ space_cccc_01xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* STRB  : cccc 01xx x1x0 xxxx xxxx xxxx xxxx xxxx */
 	/* STRBT : cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */
 	/* STRT  : cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */
+
+	if ((insn & 0x00500000) == 0x00500000 && is_r15(insn, 12))
+		return INSN_REJECTED;	/* LDRB into PC */
+
 	return prep_emulate_ldr_str(insn, asi);
 }
 
@@ -1323,10 +1490,9 @@ space_cccc_100x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/* LDM(1) : cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
 	/* STM(1) : cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */
-	asi->insn[0] = truecc_insn(insn);
 	asi->insn_handler = ((insn & 0x108000) == 0x008000) ? /* STM & R15 */
 				simulate_stm1_pc : simulate_ldm1stm1;
-	return INSN_GOOD;
+	return INSN_GOOD_NO_SLOT;
 }
 
 static enum kprobe_insn __kprobes
@@ -1334,58 +1500,117 @@ space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* B  : cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */
 	/* BL : cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */
-	asi->insn[0] = truecc_insn(insn);
 	asi->insn_handler = simulate_bbl;
-	return INSN_GOOD;
+	return INSN_GOOD_NO_SLOT;
 }
 
 static enum kprobe_insn __kprobes
-space_cccc_1100_010x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+space_cccc_11xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	/* Coprocessor instructions... */
 	/* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
 	/* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
-	insn &= 0xfff00fff;
-	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
-	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr;
-	return INSN_GOOD;
+	/* LDC  : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC  : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP  : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR  : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC  : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
+
+	/* SVC  : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
+
+	return INSN_REJECTED;
 }
 
-static enum kprobe_insn __kprobes
-space_cccc_110x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static unsigned long __kprobes __check_eq(unsigned long cpsr)
 {
-	/* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	insn &= 0xfff0ffff;	/* Rn = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_ldcstc;
-	return INSN_GOOD;
+	return cpsr & PSR_Z_BIT;
 }
 
-static enum kprobe_insn __kprobes
-space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static unsigned long __kprobes __check_ne(unsigned long cpsr)
 {
-	/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-	/* SWI  : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
-	if ((insn & 0xfff000f0) == 0xe1200070 ||
-	    (insn & 0x0f000000) == 0x0f000000)
-		return INSN_REJECTED;
+	return (~cpsr) & PSR_Z_BIT;
+}
 
-	/* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	if ((insn & 0x0f000010) == 0x0e000000) {
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_none;
-		return INSN_GOOD;
-	}
+static unsigned long __kprobes __check_cs(unsigned long cpsr)
+{
+	return cpsr & PSR_C_BIT;
+}
 
-	/* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12;
-	return INSN_GOOD;
+static unsigned long __kprobes __check_cc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_mi(unsigned long cpsr)
+{
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_pl(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_vs(unsigned long cpsr)
+{
+	return cpsr & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_vc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_hi(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return cpsr & PSR_C_BIT;
 }
 
+static unsigned long __kprobes __check_ls(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ge(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_lt(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_gt(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return (~temp) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_le(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return temp & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_al(unsigned long cpsr)
+{
+	return true;
+}
+
+static kprobe_check_cc * const condition_checks[16] = {
+	&__check_eq, &__check_ne, &__check_cs, &__check_cc,
+	&__check_mi, &__check_pl, &__check_vs, &__check_vc,
+	&__check_hi, &__check_ls, &__check_ge, &__check_lt,
+	&__check_gt, &__check_le, &__check_al, &__check_al
+};
+
 /* Return:
  *   INSN_REJECTED     If instruction is one not allowed to kprobe,
  *   INSN_GOOD         If instruction is supported and uses instruction slot,
@@ -1401,133 +1626,45 @@ space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 enum kprobe_insn __kprobes
 arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	asi->insn_check_cc = condition_checks[insn>>28];
 	asi->insn[1] = KPROBE_RETURN_INSTRUCTION;
 
-	if ((insn & 0xf0000000) == 0xf0000000) {
+	if ((insn & 0xf0000000) == 0xf0000000)
 
 		return space_1111(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x00000000) {
+	else if ((insn & 0x0e000000) == 0x00000000)
 
 		return space_cccc_000x(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x02000000) {
+	else if ((insn & 0x0e000000) == 0x02000000)
 
 		return space_cccc_001x(insn, asi);
 
-	} else if ((insn & 0x0f000010) == 0x06000010) {
+	else if ((insn & 0x0f000010) == 0x06000010)
 
 		return space_cccc_0110__1(insn, asi);
 
-	} else if ((insn & 0x0f000010) == 0x07000010) {
+	else if ((insn & 0x0f000010) == 0x07000010)
 
 		return space_cccc_0111__1(insn, asi);
 
-	} else if ((insn & 0x0c000000) == 0x04000000) {
+	else if ((insn & 0x0c000000) == 0x04000000)
 
 		return space_cccc_01xx(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x08000000) {
+	else if ((insn & 0x0e000000) == 0x08000000)
 
 		return space_cccc_100x(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x0a000000) {
+	else if ((insn & 0x0e000000) == 0x0a000000)
 
 		return space_cccc_101x(insn, asi);
 
-	} else if ((insn & 0x0fe00000) == 0x0c400000) {
-
-		return space_cccc_1100_010x(insn, asi);
-
-	} else if ((insn & 0x0e000000) == 0x0c000000) {
-
-		return space_cccc_110x(insn, asi);
-
-	}
-
-	return space_cccc_111x(insn, asi);
+	return space_cccc_11xx(insn, asi);
 }
 
 void __init arm_kprobe_decode_init(void)
 {
 	find_str_pc_offset();
 }
-
-
-/*
- * All ARM instructions listed below.
- *
- * Instructions and their general purpose registers are given.
- * If a particular register may not use R15, it is prefixed with a "!".
- * If marked with a "*" means the value returned by reading R15
- * is implementation defined.
- *
- * ADC/ADD/AND/BIC/CMN/CMP/EOR/MOV/MVN/ORR/RSB/RSC/SBC/SUB/TEQ
- *     TST: Rd, Rn, Rm, !Rs
- * BX: Rm
- * BLX(2): !Rm
- * BX: Rm (R15 legal, but discouraged)
- * BXJ: !Rm,
- * CLZ: !Rd, !Rm
- * CPY: Rd, Rm
- * LDC/2,STC/2 immediate offset & unindex: Rn
- * LDC/2,STC/2 immediate pre/post-indexed: !Rn
- * LDM(1/3): !Rn, register_list
- * LDM(2): !Rn, !register_list
- * LDR,STR,PLD immediate offset: Rd, Rn
- * LDR,STR,PLD register offset: Rd, Rn, !Rm
- * LDR,STR,PLD scaled register offset: Rd, !Rn, !Rm
- * LDR,STR immediate pre/post-indexed: Rd, !Rn
- * LDR,STR register pre/post-indexed: Rd, !Rn, !Rm
- * LDR,STR scaled register pre/post-indexed: Rd, !Rn, !Rm
- * LDRB,STRB immediate offset: !Rd, Rn
- * LDRB,STRB register offset: !Rd, Rn, !Rm
- * LDRB,STRB scaled register offset: !Rd, !Rn, !Rm
- * LDRB,STRB immediate pre/post-indexed: !Rd, !Rn
- * LDRB,STRB register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRB,STRB scaled register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRT,LDRBT,STRBT immediate pre/post-indexed: !Rd, !Rn
- * LDRT,LDRBT,STRBT register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRT,LDRBT,STRBT scaled register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRH/SH/SB/D,STRH/SH/SB/D immediate offset: !Rd, Rn
- * LDRH/SH/SB/D,STRH/SH/SB/D register offset: !Rd, Rn, !Rm
- * LDRH/SH/SB/D,STRH/SH/SB/D immediate pre/post-indexed: !Rd, !Rn
- * LDRH/SH/SB/D,STRH/SH/SB/D register pre/post-indexed: !Rd, !Rn, !Rm
- * LDREX: !Rd, !Rn
- * MCR/2: !Rd
- * MCRR/2,MRRC/2: !Rd, !Rn
- * MLA: !Rd, !Rn, !Rm, !Rs
- * MOV: Rd
- * MRC/2: !Rd (if Rd==15, only changes cond codes, not the register)
- * MRS,MSR: !Rd
- * MUL: !Rd, !Rm, !Rs
- * PKH{BT,TB}: !Rd, !Rn, !Rm
- * QDADD,[U]QADD/16/8/SUBX: !Rd, !Rm, !Rn
- * QDSUB,[U]QSUB/16/8/ADDX: !Rd, !Rm, !Rn
- * REV/16/SH: !Rd, !Rm
- * RFE: !Rn
- * {S,U}[H]ADD{16,8,SUBX},{S,U}[H]SUB{16,8,ADDX}: !Rd, !Rn, !Rm
- * SEL: !Rd, !Rn, !Rm
- * SMLA<x><y>,SMLA{D,W<y>},SMLSD,SMML{A,S}: !Rd, !Rn, !Rm, !Rs
- * SMLAL<x><y>,SMLA{D,LD},SMLSLD,SMMULL,SMULW<y>: !RdHi, !RdLo, !Rm, !Rs
- * SMMUL,SMUAD,SMUL<x><y>,SMUSD: !Rd, !Rm, !Rs
- * SSAT/16: !Rd, !Rm
- * STM(1/2): !Rn, register_list* (R15 in reg list not recommended)
- * STRT immediate pre/post-indexed: Rd*, !Rn
- * STRT register pre/post-indexed: Rd*, !Rn, !Rm
- * STRT scaled register pre/post-indexed: Rd*, !Rn, !Rm
- * STREX: !Rd, !Rn, !Rm
- * SWP/B: !Rd, !Rn, !Rm
- * {S,U}XTA{B,B16,H}: !Rd, !Rn, !Rm
- * {S,U}XT{B,B16,H}: !Rd, !Rm
- * UM{AA,LA,UL}L: !RdHi, !RdLo, !Rm, !Rs
- * USA{D8,A8,T,T16}: !Rd, !Rm, !Rs
- *
- * May transfer control by writing R15 (possible mode changes or alternate
- * mode accesses marked by "*"):
- * ALU op (* with s-bit), B, BL, BKPT, BLX(1/2), BX, BXJ, CPS*, CPY,
- * LDM(1), LDM(2/3)*, LDR, MOV, RFE*, SWI*
- *
- * Instructions that do not take general registers, nor transfer control:
- * CDP/2, SETEND, SRS*
- */
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 2ba7deb3072e..1656c87501c0 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -134,7 +134,8 @@ static void __kprobes singlestep(struct kprobe *p, struct pt_regs *regs,
 				 struct kprobe_ctlblk *kcb)
 {
 	regs->ARM_pc += 4;
-	p->ainsn.insn_handler(p, regs);
+	if (p->ainsn.insn_check_cc(regs->ARM_cpsr))
+		p->ainsn.insn_handler(p, regs);
 }
 
 /*
diff --git a/arch/arm/kernel/leds.c b/arch/arm/kernel/leds.c
index 31a316c1777b..0f107dcb0347 100644
--- a/arch/arm/kernel/leds.c
+++ b/arch/arm/kernel/leds.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/leds.h>
 
@@ -69,36 +70,37 @@ static ssize_t leds_store(struct sys_device *dev,
 
 static SYSDEV_ATTR(event, 0200, NULL, leds_store);
 
-static int leds_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_class leds_sysclass = {
+	.name		= "leds",
+};
+
+static struct sys_device leds_device = {
+	.id		= 0,
+	.cls		= &leds_sysclass,
+};
+
+static int leds_suspend(void)
 {
 	leds_event(led_stop);
 	return 0;
 }
 
-static int leds_resume(struct sys_device *dev)
+static void leds_resume(void)
 {
 	leds_event(led_start);
-	return 0;
 }
 
-static int leds_shutdown(struct sys_device *dev)
+static void leds_shutdown(void)
 {
 	leds_event(led_halted);
-	return 0;
 }
 
-static struct sysdev_class leds_sysclass = {
-	.name		= "leds",
+static struct syscore_ops leds_syscore_ops = {
 	.shutdown	= leds_shutdown,
 	.suspend	= leds_suspend,
 	.resume		= leds_resume,
 };
 
-static struct sys_device leds_device = {
-	.id		= 0,
-	.cls		= &leds_sysclass,
-};
-
 static int __init leds_init(void)
 {
 	int ret;
@@ -107,6 +109,8 @@ static int __init leds_init(void)
 		ret = sysdev_register(&leds_device);
 	if (ret == 0)
 		ret = sysdev_create_file(&leds_device, &attr_event);
+	if (ret == 0)
+		register_syscore_ops(&leds_syscore_ops);
 	return ret;
 }
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 69cfee0fe00f..139e3c827369 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -221,7 +221,7 @@ again:
 	prev_raw_count &= armpmu->max_period;
 
 	if (overflow)
-		delta = armpmu->max_period - prev_raw_count + new_raw_count;
+		delta = armpmu->max_period - prev_raw_count + new_raw_count + 1;
 	else
 		delta = new_raw_count - prev_raw_count;
 
@@ -746,7 +746,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
-	while (tail && !((unsigned long)tail & 0x3))
+	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+	       tail && !((unsigned long)tail & 0x3))
 		tail = user_backtrace(tail, entry);
 }
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 94bbedbed639..5e1e54197227 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -372,6 +372,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	if (clone_flags & CLONE_SETTLS)
 		thread->tp_value = regs->ARM_r3;
 
+	thread_notify(THREAD_NOTIFY_COPY, thread);
+
 	return 0;
 }
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2bf27f364d09..8182f45ca493 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -767,12 +767,20 @@ long arch_ptrace(struct task_struct *child, long request,
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		case PTRACE_GETHBPREGS:
+			if (ptrace_get_breakpoints(child) < 0)
+				return -ESRCH;
+
 			ret = ptrace_gethbpregs(child, addr,
 						(unsigned long __user *)data);
+			ptrace_put_breakpoints(child);
 			break;
 		case PTRACE_SETHBPREGS:
+			if (ptrace_get_breakpoints(child) < 0)
+				return -ESRCH;
+
 			ret = ptrace_sethbpregs(child, addr,
 						(unsigned long __user *)data);
+			ptrace_put_breakpoints(child);
 			break;
 #endif
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index cb8398317644..0340224cf73c 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -597,19 +597,13 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
 	return err;
 }
 
-static inline void setup_syscall_restart(struct pt_regs *regs)
-{
-	regs->ARM_r0 = regs->ARM_ORIG_r0;
-	regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
-}
-
 /*
  * OK, we're invoking a handler
  */	
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka,
 	      siginfo_t *info, sigset_t *oldset,
-	      struct pt_regs * regs, int syscall)
+	      struct pt_regs * regs)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
@@ -617,26 +611,6 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	int ret;
 
 	/*
-	 * If we were from a system call, check for system call restarting...
-	 */
-	if (syscall) {
-		switch (regs->ARM_r0) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			regs->ARM_r0 = -EINTR;
-			break;
-		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
-				regs->ARM_r0 = -EINTR;
-				break;
-			}
-			/* fallthrough */
-		case -ERESTARTNOINTR:
-			setup_syscall_restart(regs);
-		}
-	}
-
-	/*
 	 * translate the signal
 	 */
 	if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
@@ -685,6 +659,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
  */
 static void do_signal(struct pt_regs *regs, int syscall)
 {
+	unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
 	struct k_sigaction ka;
 	siginfo_t info;
 	int signr;
@@ -698,18 +673,61 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	if (!user_mode(regs))
 		return;
 
+	/*
+	 * If we were from a system call, check for system call restarting...
+	 */
+	if (syscall) {
+		continue_addr = regs->ARM_pc;
+		restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4);
+		retval = regs->ARM_r0;
+
+		/*
+		 * Prepare for system call restart.  We do this here so that a
+		 * debugger will see the already changed PSW.
+		 */
+		switch (retval) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->ARM_r0 = regs->ARM_ORIG_r0;
+			regs->ARM_pc = restart_addr;
+			break;
+		case -ERESTART_RESTARTBLOCK:
+			regs->ARM_r0 = -EINTR;
+			break;
+		}
+	}
+
 	if (try_to_freeze())
 		goto no_signal;
 
+	/*
+	 * Get the signal to deliver.  When running under ptrace, at this
+	 * point the debugger may change all our registers ...
+	 */
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		sigset_t *oldset;
 
+		/*
+		 * Depending on the signal settings we may need to revert the
+		 * decision to restart the system call.  But skip this if a
+		 * debugger has chosen to restart at a different PC.
+		 */
+		if (regs->ARM_pc == restart_addr) {
+			if (retval == -ERESTARTNOHAND
+			    || (retval == -ERESTARTSYS
+				&& !(ka.sa.sa_flags & SA_RESTART))) {
+				regs->ARM_r0 = -EINTR;
+				regs->ARM_pc = continue_addr;
+			}
+		}
+
 		if (test_thread_flag(TIF_RESTORE_SIGMASK))
 			oldset = &current->saved_sigmask;
 		else
 			oldset = &current->blocked;
-		if (handle_signal(signr, &ka, &info, oldset, regs, syscall) == 0) {
+		if (handle_signal(signr, &ka, &info, oldset, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
@@ -723,11 +741,14 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	}
 
  no_signal:
-	/*
-	 * No signal to deliver to the process - restart the syscall.
-	 */
 	if (syscall) {
-		if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) {
+		/*
+		 * Handle restarting a different system call.  As above,
+		 * if a debugger has chosen to restart at a different PC,
+		 * ignore the restart.
+		 */
+		if (retval == -ERESTART_RESTARTBLOCK
+		    && regs->ARM_pc == continue_addr) {
 			if (thumb_mode(regs)) {
 				regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE;
 				regs->ARM_pc -= 2;
@@ -750,11 +771,6 @@ static void do_signal(struct pt_regs *regs, int syscall)
 #endif
 			}
 		}
-		if (regs->ARM_r0 == -ERESTARTNOHAND ||
-		    regs->ARM_r0 == -ERESTARTSYS ||
-		    regs->ARM_r0 == -ERESTARTNOINTR) {
-			setup_syscall_restart(regs);
-		}
 
 		/* If there's no signal to deliver, we just put the saved sigmask
 		 * back.
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8fe05ad932e4..007a0a950e75 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -479,7 +479,7 @@ static void broadcast_timer_set_mode(enum clock_event_mode mode,
 {
 }
 
-static void broadcast_timer_setup(struct clock_event_device *evt)
+static void __cpuinit broadcast_timer_setup(struct clock_event_device *evt)
 {
 	evt->name	= "dummy_timer";
 	evt->features	= CLOCK_EVT_FEAT_ONESHOT |
@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 		break;
 
 	case IPI_RESCHEDULE:
-		/*
-		 * nothing more to do - eveything is
-		 * done on the interrupt return path
-		 */
+		scheduler_ipi();
 		break;
 
 	case IPI_CALL_FUNC:
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 4ad8da15ef2b..af0aaebf4de6 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -311,7 +311,7 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 	long err;
 	int i;
 
-	if (nsops < 1)
+	if (nsops < 1 || nsops > SEMOPM)
 		return -EINVAL;
 	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
 	if (!sops)
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 1ff46cabc7ef..cb634c3e28e9 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -21,7 +21,7 @@
 #include <linux/timex.h>
 #include <linux/errno.h>
 #include <linux/profile.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/timer.h>
 #include <linux/irq.h>
 
@@ -115,48 +115,37 @@ void timer_tick(void)
 #endif
 
 #if defined(CONFIG_PM) && !defined(CONFIG_GENERIC_CLOCKEVENTS)
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
+static int timer_suspend(void)
 {
-	struct sys_timer *timer = container_of(dev, struct sys_timer, dev);
-
-	if (timer->suspend != NULL)
-		timer->suspend();
+	if (system_timer->suspend)
+		system_timer->suspend();
 
 	return 0;
 }
 
-static int timer_resume(struct sys_device *dev)
+static void timer_resume(void)
 {
-	struct sys_timer *timer = container_of(dev, struct sys_timer, dev);
-
-	if (timer->resume != NULL)
-		timer->resume();
-
-	return 0;
+	if (system_timer->resume)
+		system_timer->resume();
 }
 #else
 #define timer_suspend NULL
 #define timer_resume NULL
 #endif
 
-static struct sysdev_class timer_sysclass = {
-	.name		= "timer",
+static struct syscore_ops timer_syscore_ops = {
 	.suspend	= timer_suspend,
 	.resume		= timer_resume,
 };
 
-static int __init timer_init_sysfs(void)
+static int __init timer_init_syscore_ops(void)
 {
-	int ret = sysdev_class_register(&timer_sysclass);
-	if (ret == 0) {
-		system_timer->dev.cls = &timer_sysclass;
-		ret = sysdev_register(&system_timer->dev);
-	}
+	register_syscore_ops(&timer_syscore_ops);
 
-	return ret;
+	return 0;
 }
 
-device_initcall(timer_init_sysfs);
+device_initcall(timer_init_syscore_ops);
 
 void __init time_init(void)
 {
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index f0000e188c8c..d52eec268b47 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -234,7 +234,6 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
@@ -410,8 +409,7 @@ static int bad_syscall(int n, struct pt_regs *regs)
 	struct thread_info *thread = current_thread_info();
 	siginfo_t info;
 
-	if (current->personality != PER_LINUX &&
-	    current->personality != PER_LINUX_32BIT &&
+	if ((current->personality & PER_MASK) != PER_LINUX &&
 	    thread->exec_domain->handler) {
 		thread->exec_domain->handler(n, regs);
 		return regs->ARM_r0;
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 19390231a0e9..2d299bf5d72f 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -83,6 +83,7 @@ config ARCH_AT91CAP9
 	select CPU_ARM926T
 	select GENERIC_CLOCKEVENTS
 	select HAVE_FB_ATMEL
+	select HAVE_NET_MACB
 
 config ARCH_AT572D940HF
 	bool "AT572D940HF"
diff --git a/arch/arm/mach-at91/board-eb01.c b/arch/arm/mach-at91/board-eb01.c
index 1f9d3cb64c50..d8df59a3426d 100644
--- a/arch/arm/mach-at91/board-eb01.c
+++ b/arch/arm/mach-at91/board-eb01.c
@@ -30,6 +30,11 @@
 #include <mach/board.h>
 #include "generic.h"
 
+static void __init at91eb01_init_irq(void)
+{
+	at91x40_init_interrupts(NULL);
+}
+
 static void __init at91eb01_map_io(void)
 {
 	at91x40_initialize(40000000);
@@ -38,7 +43,7 @@ static void __init at91eb01_map_io(void)
 MACHINE_START(AT91EB01, "Atmel AT91 EB01")
 	/* Maintainer: Greg Ungerer <gerg@snapgear.com> */
 	.timer		= &at91x40_timer,
-	.init_irq	= at91x40_init_interrupts,
+	.init_irq	= at91eb01_init_irq,
 	.map_io		= at91eb01_map_io,
 MACHINE_END
 
diff --git a/arch/arm/mach-at91/include/mach/cpu.h b/arch/arm/mach-at91/include/mach/cpu.h
index 3bef931d0b1c..0700f2125305 100644
--- a/arch/arm/mach-at91/include/mach/cpu.h
+++ b/arch/arm/mach-at91/include/mach/cpu.h
@@ -27,6 +27,7 @@
 #define ARCH_ID_AT91SAM9G45	0x819b05a0
 #define ARCH_ID_AT91SAM9G45MRL	0x819b05a2	/* aka 9G45-ES2 & non ES lots */
 #define ARCH_ID_AT91SAM9G45ES	0x819b05a1	/* 9G45-ES (Engineering Sample) */
+#define ARCH_ID_AT91SAM9X5	0x819a05a0
 #define ARCH_ID_AT91CAP9	0x039A03A0
 
 #define ARCH_ID_AT91SAM9XE128	0x329973a0
@@ -55,6 +56,12 @@ static inline unsigned long at91_cpu_fully_identify(void)
 #define ARCH_EXID_AT91SAM9G46	0x00000003
 #define ARCH_EXID_AT91SAM9G45	0x00000004
 
+#define ARCH_EXID_AT91SAM9G15	0x00000000
+#define ARCH_EXID_AT91SAM9G35	0x00000001
+#define ARCH_EXID_AT91SAM9X35	0x00000002
+#define ARCH_EXID_AT91SAM9G25	0x00000003
+#define ARCH_EXID_AT91SAM9X25	0x00000004
+
 static inline unsigned long at91_exid_identify(void)
 {
 	return at91_sys_read(AT91_DBGU_EXID);
@@ -143,6 +150,27 @@ static inline unsigned long at91cap9_rev_identify(void)
 #define cpu_is_at91sam9m11()	(0)
 #endif
 
+#ifdef CONFIG_ARCH_AT91SAM9X5
+#define cpu_is_at91sam9x5()	(at91_cpu_identify() == ARCH_ID_AT91SAM9X5)
+#define cpu_is_at91sam9g15()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G15))
+#define cpu_is_at91sam9g35()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G35))
+#define cpu_is_at91sam9x35()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9X35))
+#define cpu_is_at91sam9g25()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G25))
+#define cpu_is_at91sam9x25()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9X25))
+#else
+#define cpu_is_at91sam9x5()	(0)
+#define cpu_is_at91sam9g15()	(0)
+#define cpu_is_at91sam9g35()	(0)
+#define cpu_is_at91sam9x35()	(0)
+#define cpu_is_at91sam9g25()	(0)
+#define cpu_is_at91sam9x25()	(0)
+#endif
+
 #ifdef CONFIG_ARCH_AT91CAP9
 #define cpu_is_at91cap9()	(at91_cpu_identify() == ARCH_ID_AT91CAP9)
 #define cpu_is_at91cap9_revB()	(at91cap9_rev_identify() == ARCH_REVISION_CAP9_B)
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 32f147998cd9..c0deacae778d 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -63,6 +63,7 @@ config MACH_DAVINCI_EVM
 	depends on ARCH_DAVINCI_DM644x
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify the whether the board used
 	  for development is a DM644x EVM
@@ -72,6 +73,7 @@ config MACH_SFFSDR
 	depends on ARCH_DAVINCI_DM644x
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the Lyrtech Small Form Factor
 	  Software Defined Radio (SFFSDR) board.
@@ -105,6 +107,7 @@ config MACH_DAVINCI_DM6467_EVM
 	select MACH_DAVINCI_DM6467TEVM
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify the whether the board used
 	  for development is a DM6467 EVM
@@ -118,6 +121,7 @@ config MACH_DAVINCI_DM365_EVM
 	depends on ARCH_DAVINCI_DM365
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify whether the board used
 	  for development is a DM365 EVM
@@ -129,6 +133,7 @@ config MACH_DAVINCI_DA830_EVM
 	select GPIO_PCF857X
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the TI DA830/OMAP-L137/AM17x Evaluation Module.
 
@@ -205,6 +210,7 @@ config MACH_MITYOMAPL138
 	depends on ARCH_DAVINCI_DA850
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the Critical Link MityDSP-L138/MityARM-1808
 	  System on Module.  Information on this SoM may be found at
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 2aa79c54f98e..606a6f27ed6c 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -29,7 +29,7 @@
 #include <mach/mux.h>
 #include <mach/spi.h>
 
-#define MITYOMAPL138_PHY_ID		"0:03"
+#define MITYOMAPL138_PHY_ID		""
 
 #define FACTORY_CONFIG_MAGIC	0x012C0138
 #define FACTORY_CONFIG_VERSION	0x00010001
@@ -414,7 +414,7 @@ static struct resource mityomapl138_nandflash_resource[] = {
 
 static struct platform_device mityomapl138_nandflash_device = {
 	.name		= "davinci_nand",
-	.id		= 0,
+	.id		= 1,
 	.dev		= {
 		.platform_data	= &mityomapl138_nandflash_data,
 	},
diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 0a95be1512bb..41669ecc1f91 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -94,9 +94,7 @@ static int davinci_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return ret;
 
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,
-			dev_driver_string(cpufreq.dev),
-			"transition: %u --> %u\n", freqs.old, freqs.new);
+	dev_dbg(&cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new);
 
 	ret = cpufreq_frequency_table_target(policy, pdata->freq_table,
 						freqs.new, relation, &idx);
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 625d4b66718b..58a02dc7b15a 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -39,7 +39,8 @@
 #define DA8XX_GPIO_BASE			0x01e26000
 #define DA8XX_I2C1_BASE			0x01e28000
 #define DA8XX_SPI0_BASE			0x01c41000
-#define DA8XX_SPI1_BASE			0x01f0e000
+#define DA830_SPI1_BASE			0x01e12000
+#define DA850_SPI1_BASE			0x01f0e000
 
 #define DA8XX_EMAC_CTRL_REG_OFFSET	0x3000
 #define DA8XX_EMAC_MOD_REG_OFFSET	0x2000
@@ -762,8 +763,8 @@ static struct resource da8xx_spi0_resources[] = {
 
 static struct resource da8xx_spi1_resources[] = {
 	[0] = {
-		.start	= DA8XX_SPI1_BASE,
-		.end	= DA8XX_SPI1_BASE + SZ_4K - 1,
+		.start	= DA830_SPI1_BASE,
+		.end	= DA830_SPI1_BASE + SZ_4K - 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
@@ -832,5 +833,10 @@ int __init da8xx_register_spi(int instance, struct spi_board_info *info,
 
 	da8xx_spi_pdata[instance].num_chipselect = len;
 
+	if (instance == 1 && cpu_is_davinci_da850()) {
+		da8xx_spi1_resources[0].start = DA850_SPI1_BASE;
+		da8xx_spi1_resources[0].end = DA850_SPI1_BASE + SZ_4K - 1;
+	}
+
 	return platform_device_register(&da8xx_spi_device[instance]);
 }
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index f68012239641..a3a94e9c9378 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -314,7 +314,7 @@ static struct clk timer2_clk = {
 	.name = "timer2",
 	.parent = &pll1_aux_clk,
 	.lpsc = DAVINCI_LPSC_TIMER2,
-	.usecount = 1,              /* REVISIT: why can't' this be disabled? */
+	.usecount = 1,              /* REVISIT: why can't this be disabled? */
 };
 
 static struct clk timer3_clk = {
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 5f8a65424184..4c82c2716293 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -274,7 +274,7 @@ static struct clk timer2_clk = {
 	.name = "timer2",
 	.parent = &pll1_aux_clk,
 	.lpsc = DAVINCI_LPSC_TIMER2,
-	.usecount = 1,              /* REVISIT: why can't' this be disabled? */
+	.usecount = 1,              /* REVISIT: why can't this be disabled? */
 };
 
 static struct clk_lookup dm644x_clks[] = {
diff --git a/arch/arm/mach-davinci/include/mach/debug-macro.S b/arch/arm/mach-davinci/include/mach/debug-macro.S
index 9f1befc5ac38..f8b7ea4f6235 100644
--- a/arch/arm/mach-davinci/include/mach/debug-macro.S
+++ b/arch/arm/mach-davinci/include/mach/debug-macro.S
@@ -24,6 +24,9 @@
 
 #define UART_SHIFT	2
 
+#define davinci_uart_v2p(x)	((x) - PAGE_OFFSET + PLAT_PHYS_OFFSET)
+#define davinci_uart_p2v(x)	((x) - PLAT_PHYS_OFFSET + PAGE_OFFSET)
+
 		.pushsection .data
 davinci_uart_phys:	.word	0
 davinci_uart_virt:	.word	0
@@ -34,7 +37,7 @@ davinci_uart_virt:	.word	0
 		/* Use davinci_uart_phys/virt if already configured */
 10:		mrc	p15, 0, \rp, c1, c0
 		tst	\rp, #1			@ MMU enabled?
-		ldreq	\rp, =__virt_to_phys(davinci_uart_phys)
+		ldreq	\rp, =davinci_uart_v2p(davinci_uart_phys)
 		ldrne	\rp, =davinci_uart_phys
 		add	\rv, \rp, #4		@ davinci_uart_virt
 		ldr	\rp, [\rp, #0]
@@ -48,18 +51,18 @@ davinci_uart_virt:	.word	0
 		tst	\rp, #1			@ MMU enabled?
 
 		/* Copy uart phys address from decompressor uart info */
-		ldreq	\rv, =__virt_to_phys(davinci_uart_phys)
+		ldreq	\rv, =davinci_uart_v2p(davinci_uart_phys)
 		ldrne	\rv, =davinci_uart_phys
 		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =__phys_to_virt(DAVINCI_UART_INFO)
+		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
 		ldr	\rp, [\rp, #0]
 		str	\rp, [\rv]
 
 		/* Copy uart virt address from decompressor uart info */
-		ldreq	\rv, =__virt_to_phys(davinci_uart_virt)
+		ldreq	\rv, =davinci_uart_v2p(davinci_uart_virt)
 		ldrne	\rv, =davinci_uart_virt
 		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =__phys_to_virt(DAVINCI_UART_INFO)
+		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
 		ldr	\rp, [\rp, #4]
 		str	\rp, [\rv]
 
diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h
index 8051110b8ac3..c9e6ce185a66 100644
--- a/arch/arm/mach-davinci/include/mach/serial.h
+++ b/arch/arm/mach-davinci/include/mach/serial.h
@@ -22,7 +22,7 @@
  *
  * This area sits just below the page tables (see arch/arm/kernel/head.S).
  */
-#define DAVINCI_UART_INFO	(PHYS_OFFSET + 0x3ff8)
+#define DAVINCI_UART_INFO	(PLAT_PHYS_OFFSET + 0x3ff8)
 
 #define DAVINCI_UART0_BASE	(IO_PHYS + 0x20000)
 #define DAVINCI_UART1_BASE	(IO_PHYS + 0x20400)
diff --git a/arch/arm/mach-exynos4/pm.c b/arch/arm/mach-exynos4/pm.c
index 10d917d9e3ad..8755ca8dd48d 100644
--- a/arch/arm/mach-exynos4/pm.c
+++ b/arch/arm/mach-exynos4/pm.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/suspend.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
@@ -372,7 +373,27 @@ void exynos4_scu_enable(void __iomem *scu_base)
 	flush_cache_all();
 }
 
-static int exynos4_pm_resume(struct sys_device *dev)
+static struct sysdev_driver exynos4_pm_driver = {
+	.add		= exynos4_pm_add,
+};
+
+static __init int exynos4_pm_drvinit(void)
+{
+	unsigned int tmp;
+
+	s3c_pm_init();
+
+	/* All wakeup disable */
+
+	tmp = __raw_readl(S5P_WAKEUP_MASK);
+	tmp |= ((0xFF << 8) | (0x1F << 1));
+	__raw_writel(tmp, S5P_WAKEUP_MASK);
+
+	return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver);
+}
+arch_initcall(exynos4_pm_drvinit);
+
+static void exynos4_pm_resume(void)
 {
 	/* For release retention */
 
@@ -394,27 +415,15 @@ static int exynos4_pm_resume(struct sys_device *dev)
 	/* enable L2X0*/
 	writel_relaxed(1, S5P_VA_L2CC + L2X0_CTRL);
 #endif
-
-	return 0;
 }
 
-static struct sysdev_driver exynos4_pm_driver = {
-	.add		= exynos4_pm_add,
+static struct syscore_ops exynos4_pm_syscore_ops = {
 	.resume		= exynos4_pm_resume,
 };
 
-static __init int exynos4_pm_drvinit(void)
+static __init int exynos4_pm_syscore_init(void)
 {
-	unsigned int tmp;
-
-	s3c_pm_init();
-
-	/* All wakeup disable */
-
-	tmp = __raw_readl(S5P_WAKEUP_MASK);
-	tmp |= ((0xFF << 8) | (0x1F << 1));
-	__raw_writel(tmp, S5P_WAKEUP_MASK);
-
-	return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver);
+	register_syscore_ops(&exynos4_pm_syscore_ops);
+	return 0;
 }
-arch_initcall(exynos4_pm_drvinit);
+arch_initcall(exynos4_pm_syscore_init);
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index bdd257921cfb..46adca068f2c 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -4,6 +4,7 @@ menu "Footbridge Implementations"
 
 config ARCH_CATS
 	bool "CATS"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
@@ -59,6 +60,7 @@ config ARCH_EBSA285_HOST
 
 config ARCH_NETWINDER
 	bool "NetWinder"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index 441c6ce0d555..7020f1a3feca 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -10,53 +10,16 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/spinlock.h>
 #include <linux/timex.h>
 
 #include <asm/irq.h>
-
+#include <asm/i8253.h>
 #include <asm/mach/time.h>
 
 #include "common.h"
 
-#define PIT_MODE	0x43
-#define PIT_CH0		0x40
-
-#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
-
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	static int old_count;
-	static u32 old_jifs;
-	int count;
-	u32 jifs;
-
-	raw_local_irq_save(flags);
-
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);		/* latch the count */
-	count = inb_p(PIT_CH0);		/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_local_irq_restore(flags);
-
-	count = (PIT_LATCH - 1) - count;
-
-	return (cycle_t)(jifs * PIT_LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-};
+DEFINE_RAW_SPINLOCK(i8253_lock);
 
 static void pit_set_mode(enum clock_event_mode mode,
 	struct clock_event_device *evt)
@@ -121,7 +84,7 @@ static void __init isa_timer_init(void)
 	pit_ce.max_delta_ns = clockevent_delta2ns(0x7fff, &pit_ce);
 	pit_ce.min_delta_ns = clockevent_delta2ns(0x000f, &pit_ce);
 
-	clocksource_register_hz(&pit_cs, PIT_TICK_RATE);
+	clocksource_i8253_init();
 
 	setup_irq(pit_ce.irq, &pit_timer_irq);
 	clockevents_register_device(&pit_ce);
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 980803ff348c..d3e96451529c 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -24,7 +24,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/clocksource.h>
@@ -180,13 +180,13 @@ static void __init ap_init_irq(void)
 #ifdef CONFIG_PM
 static unsigned long ic_irq_enable;
 
-static int irq_suspend(struct sys_device *dev, pm_message_t state)
+static int irq_suspend(void)
 {
 	ic_irq_enable = readl(VA_IC_BASE + IRQ_ENABLE);
 	return 0;
 }
 
-static int irq_resume(struct sys_device *dev)
+static void irq_resume(void)
 {
 	/* disable all irq sources */
 	writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR);
@@ -194,33 +194,25 @@ static int irq_resume(struct sys_device *dev)
 	writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR);
 
 	writel(ic_irq_enable, VA_IC_BASE + IRQ_ENABLE_SET);
-	return 0;
 }
 #else
 #define irq_suspend NULL
 #define irq_resume NULL
 #endif
 
-static struct sysdev_class irq_class = {
-	.name		= "irq",
+static struct syscore_ops irq_syscore_ops = {
 	.suspend	= irq_suspend,
 	.resume		= irq_resume,
 };
 
-static struct sys_device irq_device = {
-	.id	= 0,
-	.cls	= &irq_class,
-};
-
-static int __init irq_init_sysfs(void)
+static int __init irq_syscore_init(void)
 {
-	int ret = sysdev_class_register(&irq_class);
-	if (ret == 0)
-		ret = sysdev_register(&irq_device);
-	return ret;
+	register_syscore_ops(&irq_syscore_ops);
+
+	return 0;
 }
 
-device_initcall(irq_init_sysfs);
+device_initcall(irq_syscore_init);
 
 /*
  * Flash handling.
diff --git a/arch/arm/mach-mmp/include/mach/gpio.h b/arch/arm/mach-mmp/include/mach/gpio.h
index ee8b02ed8011..7bfb827f3fe3 100644
--- a/arch/arm/mach-mmp/include/mach/gpio.h
+++ b/arch/arm/mach-mmp/include/mach/gpio.h
@@ -10,7 +10,7 @@
 #define BANK_OFF(n)	(((n) < 3) ? (n) << 2 : 0x100 + (((n) - 3) << 2))
 #define GPIO_REG(x)	(*((volatile u32 *)(GPIO_REGS_VIRT + (x))))
 
-#define NR_BUILTIN_GPIO	(192)
+#define NR_BUILTIN_GPIO		IRQ_GPIO_NUM
 
 #define gpio_to_bank(gpio)	((gpio) >> 5)
 #define gpio_to_irq(gpio)	(IRQ_GPIO_START + (gpio))
diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
index 4621067c7720..713be155a44d 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
@@ -8,6 +8,15 @@
 #define MFP_DRIVE_MEDIUM	(0x2 << 13)
 #define MFP_DRIVE_FAST		(0x3 << 13)
 
+#undef MFP_CFG
+#undef MFP_CFG_DRV
+
+#define MFP_CFG(pin, af)		\
+	(MFP_LPM_INPUT | MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_DRIVE_MEDIUM)
+
+#define MFP_CFG_DRV(pin, af, drv)	\
+	(MFP_LPM_INPUT | MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_DRIVE_##drv)
+
 /* GPIO */
 #define GPIO0_GPIO		MFP_CFG(GPIO0, AF5)
 #define GPIO1_GPIO		MFP_CFG(GPIO1, AF5)
diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c
index 7f568611547e..6a96911b0ad5 100644
--- a/arch/arm/mach-msm/board-qsd8x50.c
+++ b/arch/arm/mach-msm/board-qsd8x50.c
@@ -160,10 +160,7 @@ static struct msm_mmc_platform_data qsd8x50_sdc1_data = {
 
 static void __init qsd8x50_init_mmc(void)
 {
-	if (machine_is_qsd8x50_ffa() || machine_is_qsd8x50a_ffa())
-		vreg_mmc = vreg_get(NULL, "gp6");
-	else
-		vreg_mmc = vreg_get(NULL, "gp5");
+	vreg_mmc = vreg_get(NULL, "gp5");
 
 	if (IS_ERR(vreg_mmc)) {
 		pr_err("vreg get for vreg_mmc failed (%ld)\n",
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 56f920c55b6a..38b95e949d13 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -269,7 +269,7 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt)
 
 	/* Use existing clock_event for cpu 0 */
 	if (!smp_processor_id())
-		return;
+		return 0;
 
 	writel(DGT_CLK_CTL_DIV_4, MSM_TMR_BASE + DGT_CLK_CTL);
 
diff --git a/arch/arm/mach-mx3/mach-vpr200.c b/arch/arm/mach-mx3/mach-vpr200.c
index 2cf390fbd980..47a69cbc31a8 100644
--- a/arch/arm/mach-mx3/mach-vpr200.c
+++ b/arch/arm/mach-mx3/mach-vpr200.c
@@ -257,11 +257,16 @@ static const struct fsl_usb2_platform_data otg_device_pdata __initconst = {
 	.workaround	= FLS_USB2_WORKAROUND_ENGCM09152,
 };
 
+static int vpr200_usbh_init(struct platform_device *pdev)
+{
+	return mx35_initialize_usb_hw(pdev->id,
+			MXC_EHCI_INTERFACE_SINGLE_UNI | MXC_EHCI_INTERNAL_PHY);
+}
+
 /* USB HOST config */
 static const struct mxc_usbh_platform_data usb_host_pdata __initconst = {
-	.portsc		= MXC_EHCI_MODE_SERIAL,
-	.flags		= MXC_EHCI_INTERFACE_SINGLE_UNI |
-			  MXC_EHCI_INTERNAL_PHY,
+	.init = vpr200_usbh_init,
+	.portsc = MXC_EHCI_MODE_SERIAL,
 };
 
 static struct platform_device *devices[] __initdata = {
diff --git a/arch/arm/mach-mx5/board-mx53_loco.c b/arch/arm/mach-mx5/board-mx53_loco.c
index 10a1bea10548..6206b1191fe8 100644
--- a/arch/arm/mach-mx5/board-mx53_loco.c
+++ b/arch/arm/mach-mx5/board-mx53_loco.c
@@ -193,7 +193,7 @@ static iomux_v3_cfg_t mx53_loco_pads[] = {
 	.wakeup		= wake,					\
 }
 
-static const struct gpio_keys_button loco_buttons[] __initconst = {
+static struct gpio_keys_button loco_buttons[] = {
 	GPIO_BUTTON(MX53_LOCO_POWER, KEY_POWER, 1, "power", 0),
 	GPIO_BUTTON(MX53_LOCO_UI1, KEY_VOLUMEUP, 1, "volume-up", 0),
 	GPIO_BUTTON(MX53_LOCO_UI2, KEY_VOLUMEDOWN, 1, "volume-down", 0),
diff --git a/arch/arm/mach-mxs/clock-mx28.c b/arch/arm/mach-mxs/clock-mx28.c
index 1ad97fed1e94..5dcc59d5b9ec 100644
--- a/arch/arm/mach-mxs/clock-mx28.c
+++ b/arch/arm/mach-mxs/clock-mx28.c
@@ -295,11 +295,11 @@ static int name##_set_rate(struct clk *clk, unsigned long rate)		\
 	unsigned long diff, parent_rate, calc_rate;			\
 	int i;								\
 									\
-	parent_rate = clk_get_rate(clk->parent);			\
 	div_max = BM_CLKCTRL_##dr##_DIV >> BP_CLKCTRL_##dr##_DIV;	\
 	bm_busy = BM_CLKCTRL_##dr##_BUSY;				\
 									\
 	if (clk->parent == &ref_xtal_clk) {				\
+		parent_rate = clk_get_rate(clk->parent);		\
 		div = DIV_ROUND_UP(parent_rate, rate);			\
 		if (clk == &cpu_clk) {					\
 			div_max = BM_CLKCTRL_CPU_DIV_XTAL >>		\
@@ -309,6 +309,11 @@ static int name##_set_rate(struct clk *clk, unsigned long rate)		\
 		if (div == 0 || div > div_max)				\
 			return -EINVAL;					\
 	} else {							\
+		/*							\
+		 * hack alert: this block modifies clk->parent, too,	\
+		 * so the base to use it the grand parent.		\
+		 */							\
+		parent_rate = clk_get_rate(clk->parent->parent);	\
 		rate >>= PARENT_RATE_SHIFT;				\
 		parent_rate >>= PARENT_RATE_SHIFT;			\
 		diff = parent_rate;					\
diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index 6588c22b8a64..fe31d933f0ed 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c
@@ -24,75 +24,50 @@
 #ifdef CONFIG_PM_RUNTIME
 static int omap1_pm_runtime_suspend(struct device *dev)
 {
-	struct clk *iclk, *fclk;
-	int ret = 0;
+	int ret;
 
 	dev_dbg(dev, "%s\n", __func__);
 
 	ret = pm_generic_runtime_suspend(dev);
+	if (ret)
+		return ret;
 
-	fclk = clk_get(dev, "fck");
-	if (!IS_ERR(fclk)) {
-		clk_disable(fclk);
-		clk_put(fclk);
-	}
-
-	iclk = clk_get(dev, "ick");
-	if (!IS_ERR(iclk)) {
-		clk_disable(iclk);
-		clk_put(iclk);
+	ret = pm_runtime_clk_suspend(dev);
+	if (ret) {
+		pm_generic_runtime_resume(dev);
+		return ret;
 	}
 
 	return 0;
-};
+}
 
 static int omap1_pm_runtime_resume(struct device *dev)
 {
-	struct clk *iclk, *fclk;
-
 	dev_dbg(dev, "%s\n", __func__);
 
-	iclk = clk_get(dev, "ick");
-	if (!IS_ERR(iclk)) {
-		clk_enable(iclk);
-		clk_put(iclk);
-	}
+	pm_runtime_clk_resume(dev);
+	return pm_generic_runtime_resume(dev);
+}
 
-	fclk = clk_get(dev, "fck");
-	if (!IS_ERR(fclk)) {
-		clk_enable(fclk);
-		clk_put(fclk);
-	}
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = omap1_pm_runtime_suspend,
+		.runtime_resume = omap1_pm_runtime_resume,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
 
-	return pm_generic_runtime_resume(dev);
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = &default_power_domain,
+	.con_ids = { "ick", "fck", NULL, },
 };
 
 static int __init omap1_pm_runtime_init(void)
 {
-	const struct dev_pm_ops *pm;
-	struct dev_pm_ops *omap_pm;
-
 	if (!cpu_class_is_omap1())
 		return -ENODEV;
 
-	pm = platform_bus_get_pm_ops();
-	if (!pm) {
-		pr_err("%s: unable to get dev_pm_ops from platform_bus\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL);
-	if (!omap_pm) {
-		pr_err("%s: unable to alloc memory for new dev_pm_ops\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	omap_pm->runtime_suspend = omap1_pm_runtime_suspend;
-	omap_pm->runtime_resume = omap1_pm_runtime_resume;
-
-	platform_bus_set_pm_ops(omap_pm);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index a45cd6409686..66dfbccacd25 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -59,16 +59,16 @@ endif
 # Power Management
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_ARCH_OMAP2)		+= pm24xx.o
-obj-$(CONFIG_ARCH_OMAP2)		+= sleep24xx.o pm_bus.o
+obj-$(CONFIG_ARCH_OMAP2)		+= sleep24xx.o
 obj-$(CONFIG_ARCH_OMAP3)		+= pm34xx.o sleep34xx.o \
-					   cpuidle34xx.o pm_bus.o
-obj-$(CONFIG_ARCH_OMAP4)		+= pm44xx.o pm_bus.o
+					   cpuidle34xx.o
+obj-$(CONFIG_ARCH_OMAP4)		+= pm44xx.o
 obj-$(CONFIG_PM_DEBUG)			+= pm-debug.o
 obj-$(CONFIG_OMAP_SMARTREFLEX)          += sr_device.o smartreflex.o
 obj-$(CONFIG_OMAP_SMARTREFLEX_CLASS3)	+= smartreflex-class3.o
 
 AFLAGS_sleep24xx.o			:=-Wa,-march=armv6
-AFLAGS_sleep34xx.o			:=-Wa,-march=armv7-a
+AFLAGS_sleep34xx.o			:=-Wa,-march=armv7-a$(plus_sec)
 
 ifeq ($(CONFIG_PM_VERBOSE),y)
 CFLAGS_pm_bus.o				+= -DDEBUG
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index e964895b80e8..f8ba20a14e62 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -141,14 +141,19 @@ static void __init rx51_init(void)
 static void __init rx51_map_io(void)
 {
 	omap2_set_globals_3xxx();
-	rx51_video_mem_init();
 	omap34xx_map_common_io();
 }
 
+static void __init rx51_reserve(void)
+{
+	rx51_video_mem_init();
+	omap_reserve();
+}
+
 MACHINE_START(NOKIA_RX51, "Nokia RX-51 board")
 	/* Maintainer: Lauri Leukkunen <lauri.leukkunen@nokia.com> */
 	.boot_params	= 0x80000100,
-	.reserve	= omap_reserve,
+	.reserve	= rx51_reserve,
 	.map_io		= rx51_map_io,
 	.init_early	= rx51_init_early,
 	.init_irq	= omap_init_irq,
diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
index b2b1e37bb6bb..d6e34dd9e7e7 100644
--- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
+++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
@@ -115,6 +115,7 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
 				  sdrc_cs0->rfr_ctrl, sdrc_cs0->actim_ctrla,
 				  sdrc_cs0->actim_ctrlb, sdrc_cs0->mr,
 				  0, 0, 0, 0);
+	clk->rate = rate;
 
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c
index 276992d3b7fb..8c965671b4d4 100644
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -3116,14 +3116,9 @@ static struct omap_clk omap44xx_clks[] = {
 	CLK(NULL,	"dsp_fck",			&dsp_fck,	CK_443X),
 	CLK("omapdss_dss",	"sys_clk",			&dss_sys_clk,	CK_443X),
 	CLK("omapdss_dss",	"tv_clk",			&dss_tv_clk,	CK_443X),
-	CLK("omapdss_dss",	"dss_clk",			&dss_dss_clk,	CK_443X),
 	CLK("omapdss_dss",	"video_clk",			&dss_48mhz_clk,	CK_443X),
-	CLK("omapdss_dss",	"fck",				&dss_fck,	CK_443X),
-	/*
-	 * On OMAP4, DSS ick is a dummy clock; this is needed for compatibility
-	 * with OMAP2/3.
-	 */
-	CLK("omapdss_dss",	"ick",				&dummy_ck,	CK_443X),
+	CLK("omapdss_dss",	"fck",				&dss_dss_clk,	CK_443X),
+	CLK("omapdss_dss",	"ick",				&dss_fck,	CK_443X),
 	CLK(NULL,	"efuse_ctrl_cust_fck",		&efuse_ctrl_cust_fck,	CK_443X),
 	CLK(NULL,	"emif1_fck",			&emif1_fck,	CK_443X),
 	CLK(NULL,	"emif2_fck",			&emif2_fck,	CK_443X),
diff --git a/arch/arm/mach-omap2/cm2xxx_3xxx.c b/arch/arm/mach-omap2/cm2xxx_3xxx.c
index 9d0dec806e92..38830d8d4783 100644
--- a/arch/arm/mach-omap2/cm2xxx_3xxx.c
+++ b/arch/arm/mach-omap2/cm2xxx_3xxx.c
@@ -247,6 +247,7 @@ struct omap3_cm_regs {
 	u32 per_cm_clksel;
 	u32 emu_cm_clksel;
 	u32 emu_cm_clkstctrl;
+	u32 pll_cm_autoidle;
 	u32 pll_cm_autoidle2;
 	u32 pll_cm_clksel4;
 	u32 pll_cm_clksel5;
@@ -319,6 +320,15 @@ void omap3_cm_save_context(void)
 		omap2_cm_read_mod_reg(OMAP3430_EMU_MOD, CM_CLKSEL1);
 	cm_context.emu_cm_clkstctrl =
 		omap2_cm_read_mod_reg(OMAP3430_EMU_MOD, OMAP2_CM_CLKSTCTRL);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL == 1.
+	 * In this case, even though this register has been saved in
+	 * scratchpad contents, we need to restore AUTO_PERIPH_DPLL
+	 * by ourselves. So, we need to save it anyway.
+	 */
+	cm_context.pll_cm_autoidle =
+		omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE);
 	cm_context.pll_cm_autoidle2 =
 		omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE2);
 	cm_context.pll_cm_clksel4 =
@@ -441,6 +451,13 @@ void omap3_cm_restore_context(void)
 			       CM_CLKSEL1);
 	omap2_cm_write_mod_reg(cm_context.emu_cm_clkstctrl, OMAP3430_EMU_MOD,
 			       OMAP2_CM_CLKSTCTRL);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL == 1.
+	 * In this case, we need to restore AUTO_PERIPH_DPLL by ourselves.
+	 */
+	omap2_cm_write_mod_reg(cm_context.pll_cm_autoidle, PLL_MOD,
+			       CM_AUTOIDLE);
 	omap2_cm_write_mod_reg(cm_context.pll_cm_autoidle2, PLL_MOD,
 			       CM_AUTOIDLE2);
 	omap2_cm_write_mod_reg(cm_context.pll_cm_clksel4, PLL_MOD,
diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index 695279419020..da53ba3917ca 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -316,8 +316,14 @@ void omap3_save_scratchpad_contents(void)
 			omap2_cm_read_mod_reg(WKUP_MOD, CM_CLKSEL);
 	prcm_block_contents.cm_clken_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, CM_CLKEN);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL..AUTO_PERIPH_DPLL == 1.
+	 * Then,  in anycase, clear these bits to avoid extra latencies.
+	 */
 	prcm_block_contents.cm_autoidle_pll =
-			omap2_cm_read_mod_reg(PLL_MOD, OMAP3430_CM_AUTOIDLE_PLL);
+			omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE) &
+			~OMAP3430_AUTO_PERIPH_DPLL_MASK;
 	prcm_block_contents.cm_clksel1_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, OMAP3430_CM_CLKSEL1_PLL);
 	prcm_block_contents.cm_clksel2_pll =
diff --git a/arch/arm/mach-omap2/omap_hwmod_2420_data.c b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
index 8eb3ce1bbfbe..c4d0ae87d62a 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2420_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
@@ -1639,6 +1639,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio1_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio1_irqs),
 	.main_clk	= "gpios_fck",
@@ -1669,6 +1670,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio2_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio2_irqs),
 	.main_clk	= "gpios_fck",
@@ -1699,6 +1701,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio3_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio3_irqs),
 	.main_clk	= "gpios_fck",
@@ -1729,6 +1732,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio4_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio4_irqs),
 	.main_clk	= "gpios_fck",
@@ -1782,7 +1786,7 @@ static struct omap_hwmod_irq_info omap2420_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap2420_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index e6e3810db77f..9682dd519f8d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -1742,6 +1742,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio1_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio1_irqs),
 	.main_clk	= "gpios_fck",
@@ -1772,6 +1773,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio2_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio2_irqs),
 	.main_clk	= "gpios_fck",
@@ -1802,6 +1804,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio3_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio3_irqs),
 	.main_clk	= "gpios_fck",
@@ -1832,6 +1835,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio4_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio4_irqs),
 	.main_clk	= "gpios_fck",
@@ -1862,6 +1866,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio5_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio5_hwmod = {
 	.name		= "gpio5",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio5_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio5_irqs),
 	.main_clk	= "gpio5_fck",
@@ -1915,7 +1920,7 @@ static struct omap_hwmod_irq_info omap2430_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap2430_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index b98e2dfcba28..909a84de6682 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2141,6 +2141,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio1_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio1_irqs),
 	.main_clk	= "gpio1_ick",
@@ -2177,6 +2178,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio2_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio2_irqs),
 	.main_clk	= "gpio2_ick",
@@ -2213,6 +2215,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio3_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio3_irqs),
 	.main_clk	= "gpio3_ick",
@@ -2249,6 +2252,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio4_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio4_irqs),
 	.main_clk	= "gpio4_ick",
@@ -2285,6 +2289,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio5_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio5_hwmod = {
 	.name		= "gpio5",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio5_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio5_irqs),
 	.main_clk	= "gpio5_ick",
@@ -2321,6 +2326,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio6_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio6_hwmod = {
 	.name		= "gpio6",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio6_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio6_irqs),
 	.main_clk	= "gpio6_ick",
@@ -2386,7 +2392,7 @@ static struct omap_hwmod_irq_info omap3xxx_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap3xxx_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 3e88dd3f8ef3..abc548a0c98d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -885,7 +885,7 @@ static struct omap_hwmod_ocp_if *omap44xx_dma_system_masters[] = {
 static struct omap_hwmod_addr_space omap44xx_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x4a056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x4a056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_l3_smx.c b/arch/arm/mach-omap2/omap_l3_smx.c
index 5f2da7565b68..4321e7938929 100644
--- a/arch/arm/mach-omap2/omap_l3_smx.c
+++ b/arch/arm/mach-omap2/omap_l3_smx.c
@@ -196,11 +196,11 @@ static irqreturn_t omap3_l3_app_irq(int irq, void *_l3)
 		/* No timeout error for debug sources */
 	}
 
-	base = ((l3->rt) + (*(omap3_l3_bases[int_type] + err_source)));
-
 	/* identify the error source */
 	for (err_source = 0; !(status & (1 << err_source)); err_source++)
 									;
+
+	base = l3->rt + *(omap3_l3_bases[int_type] + err_source);
 	error = omap3_l3_readll(base, L3_ERROR_LOG);
 
 	if (error) {
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 30af3351c2d6..49486f522dca 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -89,6 +89,7 @@ static void omap2_init_processor_devices(void)
 	if (cpu_is_omap44xx()) {
 		_init_omap_device("l3_main_1", &l3_dev);
 		_init_omap_device("dsp", &dsp_dev);
+		_init_omap_device("iva", &iva_dev);
 	} else {
 		_init_omap_device("l3_main", &l3_dev);
 	}
diff --git a/arch/arm/mach-omap2/pm_bus.c b/arch/arm/mach-omap2/pm_bus.c
deleted file mode 100644
index 5acd2ab298b1..000000000000
--- a/arch/arm/mach-omap2/pm_bus.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Runtime PM support code for OMAP
- *
- * Author: Kevin Hilman, Deep Root Systems, LLC
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/pm_runtime.h>
-#include <linux/platform_device.h>
-#include <linux/mutex.h>
-
-#include <plat/omap_device.h>
-#include <plat/omap-pm.h>
-
-#ifdef CONFIG_PM_RUNTIME
-static int omap_pm_runtime_suspend(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	int r, ret = 0;
-
-	dev_dbg(dev, "%s\n", __func__);
-
-	ret = pm_generic_runtime_suspend(dev);
-
-	if (!ret && dev->parent == &omap_device_parent) {
-		r = omap_device_idle(pdev);
-		WARN_ON(r);
-	}
-
-	return ret;
-};
-
-static int omap_pm_runtime_resume(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	int r;
-
-	dev_dbg(dev, "%s\n", __func__);
-
-	if (dev->parent == &omap_device_parent) {
-		r = omap_device_enable(pdev);
-		WARN_ON(r);
-	}
-
-	return pm_generic_runtime_resume(dev);
-};
-#else
-#define omap_pm_runtime_suspend NULL
-#define omap_pm_runtime_resume NULL
-#endif /* CONFIG_PM_RUNTIME */
-
-static int __init omap_pm_runtime_init(void)
-{
-	const struct dev_pm_ops *pm;
-	struct dev_pm_ops *omap_pm;
-
-	pm = platform_bus_get_pm_ops();
-	if (!pm) {
-		pr_err("%s: unable to get dev_pm_ops from platform_bus\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL);
-	if (!omap_pm) {
-		pr_err("%s: unable to alloc memory for new dev_pm_ops\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	omap_pm->runtime_suspend = omap_pm_runtime_suspend;
-	omap_pm->runtime_resume = omap_pm_runtime_resume;
-
-	platform_bus_set_pm_ops(omap_pm);
-
-	return 0;
-}
-core_initcall(omap_pm_runtime_init);
diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c
index 6fb520999b6e..0c1552d9d995 100644
--- a/arch/arm/mach-omap2/voltage.c
+++ b/arch/arm/mach-omap2/voltage.c
@@ -114,7 +114,6 @@ static int __init _config_common_vdd_data(struct omap_vdd_info *vdd)
 	sys_clk_speed /= 1000;
 
 	/* Generic voltage parameters */
-	vdd->curr_volt = 1200000;
 	vdd->volt_scale = vp_forceupdate_scale_voltage;
 	vdd->vp_enabled = false;
 
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index bfbecec6d05f..810a982a66f8 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -15,7 +15,6 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
diff --git a/arch/arm/mach-pxa/clock-pxa2xx.c b/arch/arm/mach-pxa/clock-pxa2xx.c
index 1ce090448493..1d5859d9a0e3 100644
--- a/arch/arm/mach-pxa/clock-pxa2xx.c
+++ b/arch/arm/mach-pxa/clock-pxa2xx.c
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/pxa2xx-regs.h>
 
@@ -33,32 +33,22 @@ const struct clkops clk_pxa2xx_cken_ops = {
 #ifdef CONFIG_PM
 static uint32_t saved_cken;
 
-static int pxa2xx_clock_suspend(struct sys_device *d, pm_message_t state)
+static int pxa2xx_clock_suspend(void)
 {
 	saved_cken = CKEN;
 	return 0;
 }
 
-static int pxa2xx_clock_resume(struct sys_device *d)
+static void pxa2xx_clock_resume(void)
 {
 	CKEN = saved_cken;
-	return 0;
 }
 #else
 #define pxa2xx_clock_suspend	NULL
 #define pxa2xx_clock_resume	NULL
 #endif
 
-struct sysdev_class pxa2xx_clock_sysclass = {
-	.name		= "pxa2xx-clock",
+struct syscore_ops pxa2xx_clock_syscore_ops = {
 	.suspend	= pxa2xx_clock_suspend,
 	.resume		= pxa2xx_clock_resume,
 };
-
-static int __init pxa2xx_clock_init(void)
-{
-	if (cpu_is_pxa2xx())
-		return sysdev_class_register(&pxa2xx_clock_sysclass);
-	return 0;
-}
-postcore_initcall(pxa2xx_clock_init);
diff --git a/arch/arm/mach-pxa/clock-pxa3xx.c b/arch/arm/mach-pxa/clock-pxa3xx.c
index 3f864cd0bd28..2a37a9a8f621 100644
--- a/arch/arm/mach-pxa/clock-pxa3xx.c
+++ b/arch/arm/mach-pxa/clock-pxa3xx.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/smemc.h>
 #include <mach/pxa3xx-regs.h>
@@ -182,7 +183,7 @@ const struct clkops clk_pxa3xx_pout_ops = {
 static uint32_t cken[2];
 static uint32_t accr;
 
-static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state)
+static int pxa3xx_clock_suspend(void)
 {
 	cken[0] = CKENA;
 	cken[1] = CKENB;
@@ -190,28 +191,18 @@ static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state)
 	return 0;
 }
 
-static int pxa3xx_clock_resume(struct sys_device *d)
+static void pxa3xx_clock_resume(void)
 {
 	ACCR = accr;
 	CKENA = cken[0];
 	CKENB = cken[1];
-	return 0;
 }
 #else
 #define pxa3xx_clock_suspend	NULL
 #define pxa3xx_clock_resume	NULL
 #endif
 
-struct sysdev_class pxa3xx_clock_sysclass = {
-	.name		= "pxa3xx-clock",
+struct syscore_ops pxa3xx_clock_syscore_ops = {
 	.suspend	= pxa3xx_clock_suspend,
 	.resume		= pxa3xx_clock_resume,
 };
-
-static int __init pxa3xx_clock_init(void)
-{
-	if (cpu_is_pxa3xx() || cpu_is_pxa95x())
-		return sysdev_class_register(&pxa3xx_clock_sysclass);
-	return 0;
-}
-postcore_initcall(pxa3xx_clock_init);
diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h
index f9f349a21b54..1f2fb9c43f06 100644
--- a/arch/arm/mach-pxa/clock.h
+++ b/arch/arm/mach-pxa/clock.h
@@ -1,5 +1,5 @@
 #include <linux/clkdev.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
@@ -54,7 +54,7 @@ extern const struct clkops clk_pxa2xx_cken_ops;
 void clk_pxa2xx_cken_enable(struct clk *clk);
 void clk_pxa2xx_cken_disable(struct clk *clk);
 
-extern struct sysdev_class pxa2xx_clock_sysclass;
+extern struct syscore_ops pxa2xx_clock_syscore_ops;
 
 #if defined(CONFIG_PXA3xx) || defined(CONFIG_PXA95x)
 #define DEFINE_PXA3_CKEN(_name, _cken, _rate, _delay)	\
@@ -74,5 +74,6 @@ extern const struct clkops clk_pxa3xx_smemc_ops;
 extern void clk_pxa3xx_cken_enable(struct clk *);
 extern void clk_pxa3xx_cken_disable(struct clk *);
 
-extern struct sysdev_class pxa3xx_clock_sysclass;
+extern struct syscore_ops pxa3xx_clock_syscore_ops;
+
 #endif
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index b88d601a8090..13518a705399 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c
index 8225e2e58c6e..a10996782476 100644
--- a/arch/arm/mach-pxa/cm-x2xx.c
+++ b/arch/arm/mach-pxa/cm-x2xx.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
 
@@ -388,7 +388,7 @@ static inline void cmx2xx_init_display(void) {}
 #ifdef CONFIG_PM
 static unsigned long sleep_save_msc[10];
 
-static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state)
+static int cmx2xx_suspend(void)
 {
 	cmx2xx_pci_suspend();
 
@@ -412,7 +412,7 @@ static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int cmx2xx_resume(struct sys_device *dev)
+static void cmx2xx_resume(void)
 {
 	cmx2xx_pci_resume();
 
@@ -420,27 +420,18 @@ static int cmx2xx_resume(struct sys_device *dev)
 	__raw_writel(sleep_save_msc[0], MSC0);
 	__raw_writel(sleep_save_msc[1], MSC1);
 	__raw_writel(sleep_save_msc[2], MSC2);
-
-	return 0;
 }
 
-static struct sysdev_class cmx2xx_pm_sysclass = {
-	.name = "pm",
+static struct syscore_ops cmx2xx_pm_syscore_ops = {
 	.resume = cmx2xx_resume,
 	.suspend = cmx2xx_suspend,
 };
 
-static struct sys_device cmx2xx_pm_device = {
-	.cls = &cmx2xx_pm_sysclass,
-};
-
 static int __init cmx2xx_pm_init(void)
 {
-	int error;
-	error = sysdev_class_register(&cmx2xx_pm_sysclass);
-	if (error == 0)
-		error = sysdev_register(&cmx2xx_pm_device);
-	return error;
+	register_syscore_ops(&cmx2xx_pm_syscore_ops);
+
+	return 0;
 }
 #else
 static int __init cmx2xx_pm_init(void) { return 0; }
diff --git a/arch/arm/mach-pxa/colibri-evalboard.c b/arch/arm/mach-pxa/colibri-evalboard.c
index 81c3c433e2d6..d28e802e2448 100644
--- a/arch/arm/mach-pxa/colibri-evalboard.c
+++ b/arch/arm/mach-pxa/colibri-evalboard.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/colibri-pxa270-income.c b/arch/arm/mach-pxa/colibri-pxa270-income.c
index 44c1b77ece67..80538b8806ed 100644
--- a/arch/arm/mach-pxa/colibri-pxa270-income.c
+++ b/arch/arm/mach-pxa/colibri-pxa270-income.c
@@ -22,7 +22,6 @@
 #include <linux/platform_device.h>
 #include <linux/pwm_backlight.h>
 #include <linux/i2c/pxa-i2c.h>
-#include <linux/sysdev.h>
 
 #include <asm/irq.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c
index 6fc5d328ba7f..7545a48ed88b 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -17,7 +17,6 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/ucb1400.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h
index a079d8baa45a..e6c9344a95ae 100644
--- a/arch/arm/mach-pxa/generic.h
+++ b/arch/arm/mach-pxa/generic.h
@@ -61,10 +61,10 @@ extern unsigned pxa3xx_get_clk_frequency_khz(int);
 #define pxa3xx_get_clk_frequency_khz(x)		(0)
 #endif
 
-extern struct sysdev_class pxa_irq_sysclass;
-extern struct sysdev_class pxa_gpio_sysclass;
-extern struct sysdev_class pxa2xx_mfp_sysclass;
-extern struct sysdev_class pxa3xx_mfp_sysclass;
+extern struct syscore_ops pxa_irq_syscore_ops;
+extern struct syscore_ops pxa_gpio_syscore_ops;
+extern struct syscore_ops pxa2xx_mfp_syscore_ops;
+extern struct syscore_ops pxa3xx_mfp_syscore_ops;
 
 void __init pxa_set_ffuart_info(void *info);
 void __init pxa_set_btuart_info(void *info);
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 6de0ad0eea65..9cdcca597924 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -711,7 +711,7 @@ static struct regulator_consumer_supply bq24022_consumers[] = {
 static struct regulator_init_data bq24022_init_data = {
 	.constraints = {
 		.max_uA         = 500000,
-		.valid_ops_mask = REGULATOR_CHANGE_CURRENT,
+		.valid_ops_mask = REGULATOR_CHANGE_CURRENT|REGULATOR_CHANGE_STATUS,
 	},
 	.num_consumer_supplies  = ARRAY_SIZE(bq24022_consumers),
 	.consumer_supplies      = bq24022_consumers,
diff --git a/arch/arm/mach-pxa/include/mach/gpio.h b/arch/arm/mach-pxa/include/mach/gpio.h
index b024a8b37439..c4639502efca 100644
--- a/arch/arm/mach-pxa/include/mach/gpio.h
+++ b/arch/arm/mach-pxa/include/mach/gpio.h
@@ -99,11 +99,24 @@
 #define GAFR(x)		GPIO_REG(0x54 + (((x) & 0x70) >> 2))
 
 
-#define NR_BUILTIN_GPIO 128
+#define NR_BUILTIN_GPIO		PXA_GPIO_IRQ_NUM
 
 #define gpio_to_bank(gpio)	((gpio) >> 5)
 #define gpio_to_irq(gpio)	IRQ_GPIO(gpio)
-#define irq_to_gpio(irq)	IRQ_TO_GPIO(irq)
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+	int gpio;
+
+	if (irq == IRQ_GPIO0 || irq == IRQ_GPIO1)
+		return irq - IRQ_GPIO0;
+
+	gpio = irq - PXA_GPIO_IRQ_BASE;
+	if (gpio >= 2 && gpio < NR_BUILTIN_GPIO)
+		return gpio;
+
+	return -1;
+}
 
 #ifdef CONFIG_CPU_PXA26x
 /* GPIO86/87/88/89 on PXA26x have their direction bits in GPDR2 inverted,
diff --git a/arch/arm/mach-pxa/include/mach/irqs.h b/arch/arm/mach-pxa/include/mach/irqs.h
index a4285fc00878..038402404e39 100644
--- a/arch/arm/mach-pxa/include/mach/irqs.h
+++ b/arch/arm/mach-pxa/include/mach/irqs.h
@@ -93,9 +93,6 @@
 #define GPIO_2_x_TO_IRQ(x)	(PXA_GPIO_IRQ_BASE + (x))
 #define IRQ_GPIO(x)	(((x) < 2) ? (IRQ_GPIO0 + (x)) : GPIO_2_x_TO_IRQ(x))
 
-#define IRQ_TO_GPIO_2_x(i)	((i) - PXA_GPIO_IRQ_BASE)
-#define IRQ_TO_GPIO(i)	(((i) < IRQ_GPIO(2)) ? ((i) - IRQ_GPIO0) : IRQ_TO_GPIO_2_x(i))
-
 /*
  * The following interrupts are for board specific purposes. Since
  * the kernel can only run on one machine at a time, we can re-use
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 6251e3f5c62c..32ed551bf9c5 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 
@@ -183,7 +183,7 @@ void __init pxa_init_irq(int irq_nr, set_wake_t fn)
 static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32];
 static unsigned long saved_ipr[MAX_INTERNAL_IRQS];
 
-static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa_irq_suspend(void)
 {
 	int i;
 
@@ -202,7 +202,7 @@ static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa_irq_resume(struct sys_device *dev)
+static void pxa_irq_resume(void)
 {
 	int i;
 
@@ -218,22 +218,13 @@ static int pxa_irq_resume(struct sys_device *dev)
 			__raw_writel(saved_ipr[i], IRQ_BASE + IPR(i));
 
 	__raw_writel(1, IRQ_BASE + ICCR);
-	return 0;
 }
 #else
 #define pxa_irq_suspend		NULL
 #define pxa_irq_resume		NULL
 #endif
 
-struct sysdev_class pxa_irq_sysclass = {
-	.name		= "irq",
+struct syscore_ops pxa_irq_syscore_ops = {
 	.suspend	= pxa_irq_suspend,
 	.resume		= pxa_irq_resume,
 };
-
-static int __init pxa_irq_init(void)
-{
-	return sysdev_class_register(&pxa_irq_sysclass);
-}
-
-core_initcall(pxa_irq_init);
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index f5de541725b1..6cf8180bf5bd 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -15,7 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
@@ -159,30 +159,22 @@ static void __init lpd270_init_irq(void)
 
 
 #ifdef CONFIG_PM
-static int lpd270_irq_resume(struct sys_device *dev)
+static void lpd270_irq_resume(void)
 {
 	__raw_writew(lpd270_irq_enabled, LPD270_INT_MASK);
-	return 0;
 }
 
-static struct sysdev_class lpd270_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops lpd270_irq_syscore_ops = {
 	.resume = lpd270_irq_resume,
 };
 
-static struct sys_device lpd270_irq_device = {
-	.cls = &lpd270_irq_sysclass,
-};
-
 static int __init lpd270_irq_device_init(void)
 {
-	int ret = -ENODEV;
 	if (machine_is_logicpd_pxa270()) {
-		ret = sysdev_class_register(&lpd270_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&lpd270_irq_device);
+		register_syscore_ops(&lpd270_irq_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(lpd270_irq_device_init);
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 3ede978c83d9..e10ddb827147 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/major.h>
 #include <linux/fb.h>
 #include <linux/interrupt.h>
@@ -176,31 +176,22 @@ static void __init lubbock_init_irq(void)
 
 #ifdef CONFIG_PM
 
-static int lubbock_irq_resume(struct sys_device *dev)
+static void lubbock_irq_resume(void)
 {
 	LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-	return 0;
 }
 
-static struct sysdev_class lubbock_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops lubbock_irq_syscore_ops = {
 	.resume = lubbock_irq_resume,
 };
 
-static struct sys_device lubbock_irq_device = {
-	.cls = &lubbock_irq_sysclass,
-};
-
 static int __init lubbock_irq_device_init(void)
 {
-	int ret = -ENODEV;
-
 	if (machine_is_lubbock()) {
-		ret = sysdev_class_register(&lubbock_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&lubbock_irq_device);
+		register_syscore_ops(&lubbock_irq_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(lubbock_irq_device_init);
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index a72993dde2b3..9984ef70bd79 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -599,7 +599,7 @@ static struct regulator_consumer_supply bq24022_consumers[] = {
 static struct regulator_init_data bq24022_init_data = {
 	.constraints = {
 		.max_uA         = 500000,
-		.valid_ops_mask = REGULATOR_CHANGE_CURRENT,
+		.valid_ops_mask = REGULATOR_CHANGE_CURRENT | REGULATOR_CHANGE_STATUS,
 	},
 	.num_consumer_supplies  = ARRAY_SIZE(bq24022_consumers),
 	.consumer_supplies      = bq24022_consumers,
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 95163baca29e..3479e2b3b511 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -15,7 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
@@ -185,31 +185,21 @@ static void __init mainstone_init_irq(void)
 
 #ifdef CONFIG_PM
 
-static int mainstone_irq_resume(struct sys_device *dev)
+static void mainstone_irq_resume(void)
 {
 	MST_INTMSKENA = mainstone_irq_enabled;
-	return 0;
 }
 
-static struct sysdev_class mainstone_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops mainstone_irq_syscore_ops = {
 	.resume = mainstone_irq_resume,
 };
 
-static struct sys_device mainstone_irq_device = {
-	.cls = &mainstone_irq_sysclass,
-};
-
 static int __init mainstone_irq_device_init(void)
 {
-	int ret = -ENODEV;
+	if (machine_is_mainstone())
+		register_syscore_ops(&mainstone_irq_syscore_ops);
 
-	if (machine_is_mainstone()) {
-		ret = sysdev_class_register(&mainstone_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&mainstone_irq_device);
-	}
-	return ret;
+	return 0;
 }
 
 device_initcall(mainstone_irq_device_init);
diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 1d1419b73457..87ae3129f4f7 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/gpio.h>
 #include <mach/pxa2xx-regs.h>
@@ -338,7 +338,7 @@ static unsigned long saved_gafr[2][4];
 static unsigned long saved_gpdr[4];
 static unsigned long saved_pgsr[4];
 
-static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
+static int pxa2xx_mfp_suspend(void)
 {
 	int i;
 
@@ -365,7 +365,7 @@ static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
 	return 0;
 }
 
-static int pxa2xx_mfp_resume(struct sys_device *d)
+static void pxa2xx_mfp_resume(void)
 {
 	int i;
 
@@ -376,15 +376,13 @@ static int pxa2xx_mfp_resume(struct sys_device *d)
 		PGSR(i) = saved_pgsr[i];
 	}
 	PSSR = PSSR_RDH | PSSR_PH;
-	return 0;
 }
 #else
 #define pxa2xx_mfp_suspend	NULL
 #define pxa2xx_mfp_resume	NULL
 #endif
 
-struct sysdev_class pxa2xx_mfp_sysclass = {
-	.name		= "mfp",
+struct syscore_ops pxa2xx_mfp_syscore_ops = {
 	.suspend	= pxa2xx_mfp_suspend,
 	.resume		= pxa2xx_mfp_resume,
 };
@@ -409,6 +407,6 @@ static int __init pxa2xx_mfp_init(void)
 	for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++)
 		gpdr_lpm[i] = GPDR(i * 32);
 
-	return sysdev_class_register(&pxa2xx_mfp_sysclass);
+	return 0;
 }
 postcore_initcall(pxa2xx_mfp_init);
diff --git a/arch/arm/mach-pxa/mfp-pxa3xx.c b/arch/arm/mach-pxa/mfp-pxa3xx.c
index 7a270eecd480..89863a01ecd7 100644
--- a/arch/arm/mach-pxa/mfp-pxa3xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa3xx.c
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/mfp-pxa3xx.h>
@@ -31,13 +31,13 @@
  * a pull-down mode if they're an active low chip select, and we're
  * just entering standby.
  */
-static int pxa3xx_mfp_suspend(struct sys_device *d, pm_message_t state)
+static int pxa3xx_mfp_suspend(void)
 {
 	mfp_config_lpm();
 	return 0;
 }
 
-static int pxa3xx_mfp_resume(struct sys_device *d)
+static void pxa3xx_mfp_resume(void)
 {
 	mfp_config_run();
 
@@ -47,24 +47,13 @@ static int pxa3xx_mfp_resume(struct sys_device *d)
 	 * preserve them here in case they will be referenced later
 	 */
 	ASCR &= ~(ASCR_RDH | ASCR_D1S | ASCR_D2S | ASCR_D3S);
-	return 0;
 }
 #else
 #define pxa3xx_mfp_suspend	NULL
 #define pxa3xx_mfp_resume	NULL
 #endif
 
-struct sysdev_class pxa3xx_mfp_sysclass = {
-	.name		= "mfp",
+struct syscore_ops pxa3xx_mfp_syscore_ops = {
 	.suspend	= pxa3xx_mfp_suspend,
-	.resume 	= pxa3xx_mfp_resume,
+	.resume		= pxa3xx_mfp_resume,
 };
-
-static int __init mfp_init_devicefs(void)
-{
-	if (cpu_is_pxa3xx())
-		return sysdev_class_register(&pxa3xx_mfp_sysclass);
-
-	return 0;
-}
-postcore_initcall(mfp_init_devicefs);
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index 23925db8ff74..e3470137c934 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/input.h>
 #include <linux/delay.h>
 #include <linux/gpio_keys.h>
@@ -488,7 +488,7 @@ static void install_bootstrap(void)
 }
 
 
-static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state)
+static int mioa701_sys_suspend(void)
 {
 	int i = 0, is_bt_on;
 	u32 *mem_resume_vector	= phys_to_virt(RESUME_VECTOR_ADDR);
@@ -514,7 +514,7 @@ static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state)
 	return 0;
 }
 
-static int mioa701_sys_resume(struct sys_device *sysdev)
+static void mioa701_sys_resume(void)
 {
 	int i = 0;
 	u32 *mem_resume_vector	= phys_to_virt(RESUME_VECTOR_ADDR);
@@ -527,43 +527,18 @@ static int mioa701_sys_resume(struct sys_device *sysdev)
 	*mem_resume_enabler = save_buffer[i++];
 	*mem_resume_bt	    = save_buffer[i++];
 	*mem_resume_unknown = save_buffer[i++];
-
-	return 0;
 }
 
-static struct sysdev_class mioa701_sysclass = {
-	.name = "mioa701",
-};
-
-static struct sys_device sysdev_bootstrap = {
-	.cls		= &mioa701_sysclass,
-};
-
-static struct sysdev_driver driver_bootstrap = {
-	.suspend	= &mioa701_sys_suspend,
-	.resume		= &mioa701_sys_resume,
+static struct syscore_ops mioa701_syscore_ops = {
+	.suspend	= mioa701_sys_suspend,
+	.resume		= mioa701_sys_resume,
 };
 
 static int __init bootstrap_init(void)
 {
-	int rc;
 	int save_size = mioa701_bootstrap_lg + (sizeof(u32) * 3);
 
-	rc = sysdev_class_register(&mioa701_sysclass);
-	if (rc) {
-		printk(KERN_ERR "Failed registering mioa701 sys class\n");
-		return -ENODEV;
-	}
-	rc = sysdev_register(&sysdev_bootstrap);
-	if (rc) {
-		printk(KERN_ERR "Failed registering mioa701 sys device\n");
-		return -ENODEV;
-	}
-	rc = sysdev_driver_register(&mioa701_sysclass, &driver_bootstrap);
-	if (rc) {
-		printk(KERN_ERR "Failed registering PMU sys driver\n");
-		return -ENODEV;
-	}
+	register_syscore_ops(&mioa701_syscore_ops);
 
 	save_buffer = kmalloc(save_size, GFP_KERNEL);
 	if (!save_buffer)
@@ -576,9 +551,7 @@ static int __init bootstrap_init(void)
 static void bootstrap_exit(void)
 {
 	kfree(save_buffer);
-	sysdev_driver_unregister(&mioa701_sysclass, &driver_bootstrap);
-	sysdev_unregister(&sysdev_bootstrap);
-	sysdev_class_unregister(&mioa701_sysclass);
+	unregister_syscore_ops(&mioa701_syscore_ops);
 
 	printk(KERN_CRIT "Unregistering mioa701 suspend will hang next"
 	       "resume !!!\n");
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index a6f898cbfac9..4061ecddee70 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -24,7 +24,6 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/sysdev.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c
index 8aadad55fbe4..20d1b18b1733 100644
--- a/arch/arm/mach-pxa/palmtreo.c
+++ b/arch/arm/mach-pxa/palmtreo.c
@@ -25,7 +25,6 @@
 #include <linux/pwm_backlight.h>
 #include <linux/gpio.h>
 #include <linux/power_supply.h>
-#include <linux/sysdev.h>
 #include <linux/w1-gpio.h>
 
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 3b8a4f37dbbe..65f24f0b77e8 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/gpio_keys.h>
@@ -233,9 +233,9 @@ static struct palmz72_resume_info palmz72_resume_info = {
 
 static unsigned long store_ptr;
 
-/* sys_device for Palm Zire 72 PM */
+/* syscore_ops for Palm Zire 72 PM */
 
-static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg)
+static int palmz72_pm_suspend(void)
 {
 	/* setup the resume_info struct for the original bootloader */
 	palmz72_resume_info.resume_addr = (u32) cpu_resume;
@@ -249,31 +249,23 @@ static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg)
 	return 0;
 }
 
-static int palmz72_pm_resume(struct sys_device *dev)
+static void palmz72_pm_resume(void)
 {
 	*PALMZ72_SAVE_DWORD = store_ptr;
-	return 0;
 }
 
-static struct sysdev_class palmz72_pm_sysclass = {
-	.name = "palmz72_pm",
+static struct syscore_ops palmz72_pm_syscore_ops = {
 	.suspend = palmz72_pm_suspend,
 	.resume = palmz72_pm_resume,
 };
 
-static struct sys_device palmz72_pm_device = {
-	.cls = &palmz72_pm_sysclass,
-};
-
 static int __init palmz72_pm_init(void)
 {
-	int ret = -ENODEV;
 	if (machine_is_palmz72()) {
-		ret = sysdev_class_register(&palmz72_pm_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&palmz72_pm_device);
+		register_syscore_ops(&palmz72_pm_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(palmz72_pm_init);
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index 6bde5956358d..fed363cec9c6 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/irq.h>
 
 #include <asm/mach/map.h>
@@ -285,7 +285,7 @@ static inline void pxa25x_init_pm(void) {}
 
 static int pxa25x_set_wake(struct irq_data *d, unsigned int on)
 {
-	int gpio = IRQ_TO_GPIO(d->irq);
+	int gpio = irq_to_gpio(d->irq);
 	uint32_t mask = 0;
 
 	if (gpio >= 0 && gpio < 85)
@@ -350,21 +350,9 @@ static struct platform_device *pxa25x_devices[] __initdata = {
 	&pxa_device_asoc_platform,
 };
 
-static struct sys_device pxa25x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa2xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa2xx_clock_sysclass,
-	}
-};
-
 static int __init pxa25x_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa25x()) {
 
@@ -377,11 +365,10 @@ static int __init pxa25x_init(void)
 
 		pxa25x_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa25x_sysdev); i++) {
-			ret = sysdev_register(&pxa25x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa2xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
 		ret = platform_add_devices(pxa25x_devices,
 					   ARRAY_SIZE(pxa25x_devices));
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 1cb5d0f9723f..2fecbec58d88 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/suspend.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/i2c/pxa-i2c.h>
@@ -345,7 +345,7 @@ static inline void pxa27x_init_pm(void) {}
  */
 static int pxa27x_set_wake(struct irq_data *d, unsigned int on)
 {
-	int gpio = IRQ_TO_GPIO(d->irq);
+	int gpio = irq_to_gpio(d->irq);
 	uint32_t mask;
 
 	if (gpio >= 0 && gpio < 128)
@@ -428,21 +428,9 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa27x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa2xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa2xx_clock_sysclass,
-	}
-};
-
 static int __init pxa27x_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa27x()) {
 
@@ -455,11 +443,10 @@ static int __init pxa27x_init(void)
 
 		pxa27x_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa27x_sysdev); i++) {
-			ret = sysdev_register(&pxa27x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa2xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 8dd107391157..8521d7d6f1da 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -20,7 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach/map.h>
@@ -427,21 +427,9 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa3xx_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa3xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa3xx_clock_sysclass,
-	}
-};
-
 static int __init pxa3xx_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa3xx()) {
 
@@ -462,11 +450,10 @@ static int __init pxa3xx_init(void)
 
 		pxa3xx_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa3xx_sysdev); i++) {
-			ret = sysdev_register(&pxa3xx_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa3xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/pxa95x.c b/arch/arm/mach-pxa/pxa95x.c
index 23b229bd06e9..ecc82a330fad 100644
--- a/arch/arm/mach-pxa/pxa95x.c
+++ b/arch/arm/mach-pxa/pxa95x.c
@@ -18,7 +18,7 @@
 #include <linux/i2c/pxa-i2c.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/gpio.h>
@@ -260,16 +260,6 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa95x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa3xx_clock_sysclass,
-	}
-};
-
 static int __init pxa95x_init(void)
 {
 	int ret = 0, i;
@@ -293,11 +283,9 @@ static int __init pxa95x_init(void)
 		if ((ret = pxa_init_dma(IRQ_DMA, 32)))
 			return ret;
 
-		for (i = 0; i < ARRAY_SIZE(pxa95x_sysdev); i++) {
-			ret = sysdev_register(&pxa95x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index cd1861351f75..d130f77b6d11 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -18,7 +18,6 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sysdev.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
diff --git a/arch/arm/mach-pxa/smemc.c b/arch/arm/mach-pxa/smemc.c
index 232b7316ec08..79923058d10f 100644
--- a/arch/arm/mach-pxa/smemc.c
+++ b/arch/arm/mach-pxa/smemc.c
@@ -6,7 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/smemc.h>
@@ -16,7 +16,7 @@ static unsigned long msc[2];
 static unsigned long sxcnfg, memclkcfg;
 static unsigned long csadrcfg[4];
 
-static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa3xx_smemc_suspend(void)
 {
 	msc[0] = __raw_readl(MSC0);
 	msc[1] = __raw_readl(MSC1);
@@ -30,7 +30,7 @@ static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa3xx_smemc_resume(struct sys_device *dev)
+static void pxa3xx_smemc_resume(void)
 {
 	__raw_writel(msc[0], MSC0);
 	__raw_writel(msc[1], MSC1);
@@ -40,34 +40,19 @@ static int pxa3xx_smemc_resume(struct sys_device *dev)
 	__raw_writel(csadrcfg[1], CSADRCFG1);
 	__raw_writel(csadrcfg[2], CSADRCFG2);
 	__raw_writel(csadrcfg[3], CSADRCFG3);
-
-	return 0;
 }
 
-static struct sysdev_class smemc_sysclass = {
-	.name		= "smemc",
+static struct syscore_ops smemc_syscore_ops = {
 	.suspend	= pxa3xx_smemc_suspend,
 	.resume		= pxa3xx_smemc_resume,
 };
 
-static struct sys_device smemc_sysdev = {
-	.id		= 0,
-	.cls		= &smemc_sysclass,
-};
-
 static int __init smemc_init(void)
 {
-	int ret = 0;
+	if (cpu_is_pxa3xx())
+		register_syscore_ops(&smemc_syscore_ops);
 
-	if (cpu_is_pxa3xx()) {
-		ret = sysdev_class_register(&smemc_sysclass);
-		if (ret)
-			return ret;
-
-		ret = sysdev_register(&smemc_sysdev);
-	}
-
-	return ret;
+	return 0;
 }
 subsys_initcall(smemc_init);
 #endif
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index b9cfbebdfe9c..687417a93698 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index b523f119e0f0..903218eab56d 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -44,6 +44,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/pxa25x.h>
 #include <mach/audio.h>
@@ -130,20 +131,19 @@ static u8 viper_hw_version(void)
 	return v1;
 }
 
-/* CPU sysdev */
-static int viper_cpu_suspend(struct sys_device *sysdev, pm_message_t state)
+/* CPU system core operations. */
+static int viper_cpu_suspend(void)
 {
 	viper_icr_set_bit(VIPER_ICR_R_DIS);
 	return 0;
 }
 
-static int viper_cpu_resume(struct sys_device *sysdev)
+static void viper_cpu_resume(void)
 {
 	viper_icr_clear_bit(VIPER_ICR_R_DIS);
-	return 0;
 }
 
-static struct sysdev_driver viper_cpu_sysdev_driver = {
+static struct syscore_ops viper_cpu_syscore_ops = {
 	.suspend	= viper_cpu_suspend,
 	.resume		= viper_cpu_resume,
 };
@@ -945,7 +945,7 @@ static void __init viper_init(void)
 	viper_init_vcore_gpios();
 	viper_init_cpufreq();
 
-	sysdev_driver_register(&cpu_sysdev_class, &viper_cpu_sysdev_driver);
+	register_syscore_ops(&viper_cpu_syscore_ops);
 
 	if (version) {
 		pr_info("viper: hardware v%di%d detected. "
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index f71d377c8640..67bd41488bf8 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -16,7 +16,6 @@
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
 #include <linux/usb/gpio_vbus.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-realview/include/mach/barriers.h b/arch/arm/mach-realview/include/mach/barriers.h
index 0c5d749d7b5f..9a732195aa1c 100644
--- a/arch/arm/mach-realview/include/mach/barriers.h
+++ b/arch/arm/mach-realview/include/mach/barriers.h
@@ -4,5 +4,5 @@
  * operation to deadlock the system.
  */
 #define mb()		dsb()
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		mb()
diff --git a/arch/arm/mach-s3c2410/irq.c b/arch/arm/mach-s3c2410/irq.c
index 5e2f35332056..2854129f8cc7 100644
--- a/arch/arm/mach-s3c2410/irq.c
+++ b/arch/arm/mach-s3c2410/irq.c
@@ -23,38 +23,12 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <plat/cpu.h>
 #include <plat/pm.h>
 
-static int s3c2410_irq_add(struct sys_device *sysdev)
-{
-	return 0;
-}
-
-static struct sysdev_driver s3c2410_irq_driver = {
-	.add		= s3c2410_irq_add,
+struct syscore_ops s3c24xx_irq_syscore_ops = {
 	.suspend	= s3c24xx_irq_suspend,
 	.resume		= s3c24xx_irq_resume,
 };
-
-static int __init s3c2410_irq_init(void)
-{
-	return sysdev_driver_register(&s3c2410_sysclass, &s3c2410_irq_driver);
-}
-
-arch_initcall(s3c2410_irq_init);
-
-static struct sysdev_driver s3c2410a_irq_driver = {
-	.add		= s3c2410_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
-};
-
-static int __init s3c2410a_irq_init(void)
-{
-	return sysdev_driver_register(&s3c2410a_sysclass, &s3c2410a_irq_driver);
-}
-
-arch_initcall(s3c2410a_irq_init);
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 2970ea9f7c2b..1e2d536adda9 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -17,7 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/dm9000.h>
@@ -214,17 +214,16 @@ static struct s3c2410_uartcfg bast_uartcfgs[] __initdata = {
 /* NAND Flash on BAST board */
 
 #ifdef CONFIG_PM
-static int bast_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int bast_pm_suspend(void)
 {
 	/* ensure that an nRESET is not generated on resume. */
 	gpio_direction_output(S3C2410_GPA(21), 1);
 	return 0;
 }
 
-static int bast_pm_resume(struct sys_device *sd)
+static void bast_pm_resume(void)
 {
 	s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
-	return 0;
 }
 
 #else
@@ -232,16 +231,11 @@ static int bast_pm_resume(struct sys_device *sd)
 #define bast_pm_resume NULL
 #endif
 
-static struct sysdev_class bast_pm_sysclass = {
-	.name		= "mach-bast",
+static struct syscore_ops bast_pm_syscore_ops = {
 	.suspend	= bast_pm_suspend,
 	.resume		= bast_pm_resume,
 };
 
-static struct sys_device bast_pm_sysdev = {
-	.cls		= &bast_pm_sysclass,
-};
-
 static int smartmedia_map[] = { 0 };
 static int chip0_map[] = { 1 };
 static int chip1_map[] = { 2 };
@@ -642,8 +636,7 @@ static void __init bast_map_io(void)
 
 static void __init bast_init(void)
 {
-	sysdev_class_register(&bast_pm_sysclass);
-	sysdev_register(&bast_pm_sysdev);
+	register_syscore_ops(&bast_pm_syscore_ops);
 
 	s3c_i2c0_set_platdata(&bast_i2c_info);
 	s3c_nand_set_platdata(&bast_nand_info);
diff --git a/arch/arm/mach-s3c2410/pm.c b/arch/arm/mach-s3c2410/pm.c
index 725636fc4dc3..4728f9aa7df1 100644
--- a/arch/arm/mach-s3c2410/pm.c
+++ b/arch/arm/mach-s3c2410/pm.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <linux/io.h>
 
@@ -92,7 +93,7 @@ static void s3c2410_pm_prepare(void)
 	}
 }
 
-static int s3c2410_pm_resume(struct sys_device *dev)
+static void s3c2410_pm_resume(void)
 {
 	unsigned long tmp;
 
@@ -104,10 +105,12 @@ static int s3c2410_pm_resume(struct sys_device *dev)
 
 	if ( machine_is_aml_m5900() )
 		s3c2410_gpio_setpin(S3C2410_GPF(2), 0);
-
-	return 0;
 }
 
+struct syscore_ops s3c2410_pm_syscore_ops = {
+	.resume		= s3c2410_pm_resume,
+};
+
 static int s3c2410_pm_add(struct sys_device *dev)
 {
 	pm_cpu_prep = s3c2410_pm_prepare;
@@ -119,7 +122,6 @@ static int s3c2410_pm_add(struct sys_device *dev)
 #if defined(CONFIG_CPU_S3C2410)
 static struct sysdev_driver s3c2410_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 /* register ourselves */
@@ -133,7 +135,6 @@ arch_initcall(s3c2410_pm_drvinit);
 
 static struct sysdev_driver s3c2410a_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2410a_pm_drvinit(void)
@@ -147,7 +148,6 @@ arch_initcall(s3c2410a_pm_drvinit);
 #if defined(CONFIG_CPU_S3C2440)
 static struct sysdev_driver s3c2440_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2440_pm_drvinit(void)
@@ -161,7 +161,6 @@ arch_initcall(s3c2440_pm_drvinit);
 #if defined(CONFIG_CPU_S3C2442)
 static struct sysdev_driver s3c2442_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2442_pm_drvinit(void)
diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c
index adc90a3c5890..f1d3bd8f6f17 100644
--- a/arch/arm/mach-s3c2410/s3c2410.c
+++ b/arch/arm/mach-s3c2410/s3c2410.c
@@ -19,6 +19,7 @@
 #include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -40,6 +41,7 @@
 #include <plat/devs.h>
 #include <plat/clock.h>
 #include <plat/pll.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -168,6 +170,9 @@ int __init s3c2410_init(void)
 {
 	printk("S3C2410: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2410_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2412/irq.c b/arch/arm/mach-s3c2412/irq.c
index f3355d2ec634..1a1aa220972b 100644
--- a/arch/arm/mach-s3c2412/irq.c
+++ b/arch/arm/mach-s3c2412/irq.c
@@ -202,8 +202,6 @@ static int s3c2412_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2412_irq_driver = {
 	.add		= s3c2412_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int s3c2412_irq_init(void)
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index 923e01bdf017..85dcaeb9e62f 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -17,7 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
@@ -486,7 +486,7 @@ static struct s3c2410_udc_mach_info jive_udc_cfg __initdata = {
 /* Jive power management device */
 
 #ifdef CONFIG_PM
-static int jive_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int jive_pm_suspend(void)
 {
 	/* Write the magic value u-boot uses to check for resume into
 	 * the INFORM0 register, and ensure INFORM1 is set to the
@@ -498,10 +498,9 @@ static int jive_pm_suspend(struct sys_device *sd, pm_message_t state)
 	return 0;
 }
 
-static int jive_pm_resume(struct sys_device *sd)
+static void jive_pm_resume(void)
 {
 	__raw_writel(0x0, S3C2412_INFORM0);
-	return 0;
 }
 
 #else
@@ -509,16 +508,11 @@ static int jive_pm_resume(struct sys_device *sd)
 #define jive_pm_resume NULL
 #endif
 
-static struct sysdev_class jive_pm_sysclass = {
-	.name		= "jive-pm",
+static struct syscore_ops jive_pm_syscore_ops = {
 	.suspend	= jive_pm_suspend,
 	.resume		= jive_pm_resume,
 };
 
-static struct sys_device jive_pm_sysdev = {
-	.cls		= &jive_pm_sysclass,
-};
-
 static void __init jive_map_io(void)
 {
 	s3c24xx_init_io(jive_iodesc, ARRAY_SIZE(jive_iodesc));
@@ -536,10 +530,9 @@ static void jive_power_off(void)
 
 static void __init jive_machine_init(void)
 {
-	/* register system devices for managing low level suspend */
+	/* register system core operations for managing low level suspend */
 
-	sysdev_class_register(&jive_pm_sysclass);
-	sysdev_register(&jive_pm_sysdev);
+	register_syscore_ops(&jive_pm_syscore_ops);
 
 	/* write our sleep configurations for the IO. Pull down all unused
 	 * IO, ensure that we have turned off all peripherals we do not
diff --git a/arch/arm/mach-s3c2412/pm.c b/arch/arm/mach-s3c2412/pm.c
index a7417c479ffe..752b13a7b3db 100644
--- a/arch/arm/mach-s3c2412/pm.c
+++ b/arch/arm/mach-s3c2412/pm.c
@@ -17,6 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
@@ -86,13 +87,24 @@ static struct sleep_save s3c2412_sleep[] = {
 	SAVE_ITEM(S3C2413_GPJSLPCON),
 };
 
-static int s3c2412_pm_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_driver s3c2412_pm_driver = {
+	.add		= s3c2412_pm_add,
+};
+
+static __init int s3c2412_pm_init(void)
+{
+	return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver);
+}
+
+arch_initcall(s3c2412_pm_init);
+
+static int s3c2412_pm_suspend(void)
 {
 	s3c_pm_do_save(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep));
 	return 0;
 }
 
-static int s3c2412_pm_resume(struct sys_device *dev)
+static void s3c2412_pm_resume(void)
 {
 	unsigned long tmp;
 
@@ -102,18 +114,9 @@ static int s3c2412_pm_resume(struct sys_device *dev)
 	__raw_writel(tmp, S3C2412_PWRCFG);
 
 	s3c_pm_do_restore(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep));
-	return 0;
 }
 
-static struct sysdev_driver s3c2412_pm_driver = {
-	.add		= s3c2412_pm_add,
+struct syscore_ops s3c2412_pm_syscore_ops = {
 	.suspend	= s3c2412_pm_suspend,
 	.resume		= s3c2412_pm_resume,
 };
-
-static __init int s3c2412_pm_init(void)
-{
-	return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver);
-}
-
-arch_initcall(s3c2412_pm_init);
diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c
index 4c6df51ddf33..ef0958d3e5c6 100644
--- a/arch/arm/mach-s3c2412/s3c2412.c
+++ b/arch/arm/mach-s3c2412/s3c2412.c
@@ -19,6 +19,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -244,5 +245,8 @@ int __init s3c2412_init(void)
 {
 	printk("S3C2412: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2412_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2412_sysdev);
 }
diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c
index 77b38f2381c1..28ad20d42445 100644
--- a/arch/arm/mach-s3c2416/irq.c
+++ b/arch/arm/mach-s3c2416/irq.c
@@ -236,8 +236,6 @@ static int __init s3c2416_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2416_irq_driver = {
 	.add		= s3c2416_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int __init s3c2416_irq_init(void)
diff --git a/arch/arm/mach-s3c2416/pm.c b/arch/arm/mach-s3c2416/pm.c
index 4a04205b04d5..41db2b21e213 100644
--- a/arch/arm/mach-s3c2416/pm.c
+++ b/arch/arm/mach-s3c2416/pm.c
@@ -11,6 +11,7 @@
 */
 
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
@@ -55,30 +56,26 @@ static int s3c2416_pm_add(struct sys_device *sysdev)
 	return 0;
 }
 
-static int s3c2416_pm_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_driver s3c2416_pm_driver = {
+	.add		= s3c2416_pm_add,
+};
+
+static __init int s3c2416_pm_init(void)
 {
-	return 0;
+	return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver);
 }
 
-static int s3c2416_pm_resume(struct sys_device *dev)
+arch_initcall(s3c2416_pm_init);
+
+
+static void s3c2416_pm_resume(void)
 {
 	/* unset the return-from-sleep amd inform flags */
 	__raw_writel(0x0, S3C2443_PWRMODE);
 	__raw_writel(0x0, S3C2412_INFORM0);
 	__raw_writel(0x0, S3C2412_INFORM1);
-
-	return 0;
 }
 
-static struct sysdev_driver s3c2416_pm_driver = {
-	.add		= s3c2416_pm_add,
-	.suspend	= s3c2416_pm_suspend,
+struct syscore_ops s3c2416_pm_syscore_ops = {
 	.resume		= s3c2416_pm_resume,
 };
-
-static __init int s3c2416_pm_init(void)
-{
-	return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver);
-}
-
-arch_initcall(s3c2416_pm_init);
diff --git a/arch/arm/mach-s3c2416/s3c2416.c b/arch/arm/mach-s3c2416/s3c2416.c
index ba7fd8737434..494ce913dc95 100644
--- a/arch/arm/mach-s3c2416/s3c2416.c
+++ b/arch/arm/mach-s3c2416/s3c2416.c
@@ -32,6 +32,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 
@@ -54,6 +55,7 @@
 #include <plat/devs.h>
 #include <plat/cpu.h>
 #include <plat/sdhci.h>
+#include <plat/pm.h>
 
 #include <plat/iic-core.h>
 #include <plat/fb-core.h>
@@ -95,6 +97,9 @@ int __init s3c2416_init(void)
 
 	s3c_fb_setname("s3c2443-fb");
 
+	register_syscore_ops(&s3c2416_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2416_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2440/mach-gta02.c b/arch/arm/mach-s3c2440/mach-gta02.c
index 0db2411ef4bb..716662008ce2 100644
--- a/arch/arm/mach-s3c2440/mach-gta02.c
+++ b/arch/arm/mach-s3c2440/mach-gta02.c
@@ -409,6 +409,10 @@ struct platform_device s3c24xx_pwm_device = {
 	.num_resources	= 0,
 };
 
+static struct platform_device gta02_dfbmcs320_device = {
+	.name = "dfbmcs320",
+};
+
 static struct i2c_board_info gta02_i2c_devs[] __initdata = {
 	{
 		I2C_BOARD_INFO("pcf50633", 0x73),
@@ -523,6 +527,7 @@ static struct platform_device *gta02_devices[] __initdata = {
 	&s3c_device_iis,
 	&samsung_asoc_dma,
 	&s3c_device_i2c0,
+	&gta02_dfbmcs320_device,
 	&gta02_buttons_device,
 	&s3c_device_adc,
 	&s3c_device_ts,
diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c
index 14dc67897757..d88536393310 100644
--- a/arch/arm/mach-s3c2440/mach-osiris.c
+++ b/arch/arm/mach-s3c2440/mach-osiris.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/gpio.h>
 #include <linux/device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/clk.h>
 #include <linux/i2c.h>
@@ -284,7 +284,7 @@ static struct platform_device osiris_pcmcia = {
 #ifdef CONFIG_PM
 static unsigned char pm_osiris_ctrl0;
 
-static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int osiris_pm_suspend(void)
 {
 	unsigned int tmp;
 
@@ -304,7 +304,7 @@ static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
 	return 0;
 }
 
-static int osiris_pm_resume(struct sys_device *sd)
+static void osiris_pm_resume(void)
 {
 	if (pm_osiris_ctrl0 & OSIRIS_CTRL0_FIX8)
 		__raw_writeb(OSIRIS_CTRL1_FIX8, OSIRIS_VA_CTRL1);
@@ -312,8 +312,6 @@ static int osiris_pm_resume(struct sys_device *sd)
 	__raw_writeb(pm_osiris_ctrl0, OSIRIS_VA_CTRL0);
 
 	s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
-
-	return 0;
 }
 
 #else
@@ -321,16 +319,11 @@ static int osiris_pm_resume(struct sys_device *sd)
 #define osiris_pm_resume NULL
 #endif
 
-static struct sysdev_class osiris_pm_sysclass = {
-	.name		= "mach-osiris",
+static struct syscore_ops osiris_pm_syscore_ops = {
 	.suspend	= osiris_pm_suspend,
 	.resume		= osiris_pm_resume,
 };
 
-static struct sys_device osiris_pm_sysdev = {
-	.cls		= &osiris_pm_sysclass,
-};
-
 /* Link for DVS driver to TPS65011 */
 
 static void osiris_tps_release(struct device *dev)
@@ -439,8 +432,7 @@ static void __init osiris_map_io(void)
 
 static void __init osiris_init(void)
 {
-	sysdev_class_register(&osiris_pm_sysclass);
-	sysdev_register(&osiris_pm_sysdev);
+	register_syscore_ops(&osiris_pm_syscore_ops);
 
 	s3c_i2c0_set_platdata(NULL);
 	s3c_nand_set_platdata(&osiris_nand_info);
diff --git a/arch/arm/mach-s3c2440/s3c2440.c b/arch/arm/mach-s3c2440/s3c2440.c
index f7663f731ea0..ce99ff72838d 100644
--- a/arch/arm/mach-s3c2440/s3c2440.c
+++ b/arch/arm/mach-s3c2440/s3c2440.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -33,6 +34,7 @@
 #include <plat/devs.h>
 #include <plat/cpu.h>
 #include <plat/s3c244x.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -51,6 +53,12 @@ int __init s3c2440_init(void)
 	s3c_device_wdt.resource[1].start = IRQ_S3C2440_WDT;
 	s3c_device_wdt.resource[1].end   = IRQ_S3C2440_WDT;
 
+	/* register suspend/resume handlers */
+
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c244x_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	/* register our system device for everything else */
 
 	return sysdev_register(&s3c2440_sysdev);
diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c
index ecf813546554..6224bad4d604 100644
--- a/arch/arm/mach-s3c2440/s3c2442.c
+++ b/arch/arm/mach-s3c2440/s3c2442.c
@@ -29,6 +29,7 @@
 #include <linux/err.h>
 #include <linux/device.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/mutex.h>
@@ -45,6 +46,7 @@
 #include <plat/clock.h>
 #include <plat/cpu.h>
 #include <plat/s3c244x.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -167,6 +169,10 @@ int __init s3c2442_init(void)
 {
 	printk("S3C2442: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c244x_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2442_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2440/s3c244x-irq.c b/arch/arm/mach-s3c2440/s3c244x-irq.c
index de07c2feaa32..c63e8f26d901 100644
--- a/arch/arm/mach-s3c2440/s3c244x-irq.c
+++ b/arch/arm/mach-s3c2440/s3c244x-irq.c
@@ -116,8 +116,6 @@ static int s3c244x_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2440_irq_driver = {
 	.add		= s3c244x_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int s3c2440_irq_init(void)
@@ -129,8 +127,6 @@ arch_initcall(s3c2440_irq_init);
 
 static struct sysdev_driver s3c2442_irq_driver = {
 	.add		= s3c244x_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 
diff --git a/arch/arm/mach-s3c2440/s3c244x.c b/arch/arm/mach-s3c2440/s3c244x.c
index 90c1707b9c95..7e8a23d2098a 100644
--- a/arch/arm/mach-s3c2440/s3c244x.c
+++ b/arch/arm/mach-s3c2440/s3c244x.c
@@ -19,6 +19,7 @@
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 
@@ -134,45 +135,14 @@ void __init s3c244x_init_clocks(int xtal)
 	s3c2410_baseclk_add();
 }
 
-#ifdef CONFIG_PM
-
-static struct sleep_save s3c244x_sleep[] = {
-	SAVE_ITEM(S3C2440_DSC0),
-	SAVE_ITEM(S3C2440_DSC1),
-	SAVE_ITEM(S3C2440_GPJDAT),
-	SAVE_ITEM(S3C2440_GPJCON),
-	SAVE_ITEM(S3C2440_GPJUP)
-};
-
-static int s3c244x_suspend(struct sys_device *dev, pm_message_t state)
-{
-	s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
-	return 0;
-}
-
-static int s3c244x_resume(struct sys_device *dev)
-{
-	s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
-	return 0;
-}
-
-#else
-#define s3c244x_suspend NULL
-#define s3c244x_resume  NULL
-#endif
-
 /* Since the S3C2442 and S3C2440 share  items, put both sysclasses here */
 
 struct sysdev_class s3c2440_sysclass = {
 	.name		= "s3c2440-core",
-	.suspend	= s3c244x_suspend,
-	.resume		= s3c244x_resume
 };
 
 struct sysdev_class s3c2442_sysclass = {
 	.name		= "s3c2442-core",
-	.suspend	= s3c244x_suspend,
-	.resume		= s3c244x_resume
 };
 
 /* need to register class before we actually register the device, and
@@ -194,3 +164,33 @@ static int __init s3c2442_core_init(void)
 }
 
 core_initcall(s3c2442_core_init);
+
+
+#ifdef CONFIG_PM
+static struct sleep_save s3c244x_sleep[] = {
+	SAVE_ITEM(S3C2440_DSC0),
+	SAVE_ITEM(S3C2440_DSC1),
+	SAVE_ITEM(S3C2440_GPJDAT),
+	SAVE_ITEM(S3C2440_GPJCON),
+	SAVE_ITEM(S3C2440_GPJUP)
+};
+
+static int s3c244x_suspend(void)
+{
+	s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
+	return 0;
+}
+
+static void s3c244x_resume(void)
+{
+	s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
+}
+#else
+#define s3c244x_suspend NULL
+#define s3c244x_resume  NULL
+#endif
+
+struct syscore_ops s3c244x_pm_syscore_ops = {
+	.suspend	= s3c244x_suspend,
+	.resume		= s3c244x_resume,
+};
diff --git a/arch/arm/mach-s3c64xx/irq-pm.c b/arch/arm/mach-s3c64xx/irq-pm.c
index da1bec64b9da..8bec61e242c7 100644
--- a/arch/arm/mach-s3c64xx/irq-pm.c
+++ b/arch/arm/mach-s3c64xx/irq-pm.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/serial_core.h>
 #include <linux/irq.h>
@@ -54,7 +54,7 @@ static struct irq_grp_save {
 
 static u32 irq_uart_mask[CONFIG_SERIAL_SAMSUNG_UARTS];
 
-static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state)
+static int s3c64xx_irq_pm_suspend(void)
 {
 	struct irq_grp_save *grp = eint_grp_save;
 	int i;
@@ -75,7 +75,7 @@ static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int s3c64xx_irq_pm_resume(struct sys_device *dev)
+static void s3c64xx_irq_pm_resume(void)
 {
 	struct irq_grp_save *grp = eint_grp_save;
 	int i;
@@ -94,18 +94,18 @@ static int s3c64xx_irq_pm_resume(struct sys_device *dev)
 	}
 
 	S3C_PMDBG("%s: IRQ configuration restored\n", __func__);
-	return 0;
 }
 
-static struct sysdev_driver s3c64xx_irq_driver = {
+struct syscore_ops s3c64xx_irq_syscore_ops = {
 	.suspend = s3c64xx_irq_pm_suspend,
 	.resume	 = s3c64xx_irq_pm_resume,
 };
 
-static int __init s3c64xx_irq_pm_init(void)
+static __init int s3c64xx_syscore_init(void)
 {
-	return sysdev_driver_register(&s3c64xx_sysclass, &s3c64xx_irq_driver);
-}
+	register_syscore_ops(&s3c64xx_irq_syscore_ops);
 
-arch_initcall(s3c64xx_irq_pm_init);
+	return 0;
+}
 
+core_initcall(s3c64xx_syscore_init);
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c
index 549d7924fd4c..24febae3d4c0 100644
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/suspend.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <plat/cpu.h>
@@ -140,7 +141,17 @@ static int s5pv210_pm_add(struct sys_device *sysdev)
 	return 0;
 }
 
-static int s5pv210_pm_resume(struct sys_device *dev)
+static struct sysdev_driver s5pv210_pm_driver = {
+	.add		= s5pv210_pm_add,
+};
+
+static __init int s5pv210_pm_drvinit(void)
+{
+	return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver);
+}
+arch_initcall(s5pv210_pm_drvinit);
+
+static void s5pv210_pm_resume(void)
 {
 	u32 tmp;
 
@@ -150,17 +161,15 @@ static int s5pv210_pm_resume(struct sys_device *dev)
 	__raw_writel(tmp , S5P_OTHERS);
 
 	s3c_pm_do_restore_core(s5pv210_core_save, ARRAY_SIZE(s5pv210_core_save));
-
-	return 0;
 }
 
-static struct sysdev_driver s5pv210_pm_driver = {
-	.add		= s5pv210_pm_add,
+static struct syscore_ops s5pv210_pm_syscore_ops = {
 	.resume		= s5pv210_pm_resume,
 };
 
-static __init int s5pv210_pm_drvinit(void)
+static __init int s5pv210_pm_syscore_init(void)
 {
-	return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver);
+	register_syscore_ops(&s5pv210_pm_syscore_ops);
+	return 0;
 }
-arch_initcall(s5pv210_pm_drvinit);
+arch_initcall(s5pv210_pm_syscore_init);
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 423ddb3d65e9..dfbf824a69fa 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/ioport.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <asm/mach/irq.h>
@@ -234,7 +234,7 @@ static struct sa1100irq_state {
 	unsigned int	iccr;
 } sa1100irq_state;
 
-static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state)
+static int sa1100irq_suspend(void)
 {
 	struct sa1100irq_state *st = &sa1100irq_state;
 
@@ -264,7 +264,7 @@ static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int sa1100irq_resume(struct sys_device *dev)
+static void sa1100irq_resume(void)
 {
 	struct sa1100irq_state *st = &sa1100irq_state;
 
@@ -277,24 +277,17 @@ static int sa1100irq_resume(struct sys_device *dev)
 
 		ICMR = st->icmr;
 	}
-	return 0;
 }
 
-static struct sysdev_class sa1100irq_sysclass = {
-	.name		= "sa11x0-irq",
+static struct syscore_ops sa1100irq_syscore_ops = {
 	.suspend	= sa1100irq_suspend,
 	.resume		= sa1100irq_resume,
 };
 
-static struct sys_device sa1100irq_device = {
-	.id		= 0,
-	.cls		= &sa1100irq_sysclass,
-};
-
 static int __init sa1100irq_init_devicefs(void)
 {
-	sysdev_class_register(&sa1100irq_sysclass);
-	return sysdev_register(&sa1100irq_device);
+	register_syscore_ops(&sa1100irq_syscore_ops);
+	return 0;
 }
 
 device_initcall(sa1100irq_init_devicefs);
diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 94912d3944d3..2d1b67a59e4a 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -18,152 +18,41 @@
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
 #include <linux/bitmap.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
-#define BIT_ONCE 0
-#define BIT_ACTIVE 1
-#define BIT_CLK_ENABLED 2
 
-struct pm_runtime_data {
-	unsigned long flags;
-	struct clk *clk;
-};
-
-static void __devres_release(struct device *dev, void *res)
-{
-	struct pm_runtime_data *prd = res;
-
-	dev_dbg(dev, "__devres_release()\n");
-
-	if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-		clk_disable(prd->clk);
-
-	if (test_bit(BIT_ACTIVE, &prd->flags))
-		clk_put(prd->clk);
-}
-
-static struct pm_runtime_data *__to_prd(struct device *dev)
-{
-	return devres_find(dev, __devres_release, NULL, NULL);
-}
-
-static void platform_pm_runtime_init(struct device *dev,
-				     struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) {
-		prd->clk = clk_get(dev, NULL);
-		if (!IS_ERR(prd->clk)) {
-			set_bit(BIT_ACTIVE, &prd->flags);
-			dev_info(dev, "clocks managed by runtime pm\n");
-		}
-	}
-}
-
-static void platform_pm_runtime_bug(struct device *dev,
-				    struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags))
-		dev_err(dev, "runtime pm suspend before resume\n");
-}
-
-int platform_pm_runtime_suspend(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "platform_pm_runtime_suspend()\n");
-
-	platform_pm_runtime_bug(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_disable(prd->clk);
-		clear_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
-
-int platform_pm_runtime_resume(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "platform_pm_runtime_resume()\n");
-
-	platform_pm_runtime_init(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_enable(prd->clk);
-		set_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
-
-int platform_pm_runtime_idle(struct device *dev)
+static int default_platform_runtime_idle(struct device *dev)
 {
 	/* suspend synchronously to disable clocks immediately */
 	return pm_runtime_suspend(dev);
 }
 
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct pm_runtime_data *prd;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	if (action == BUS_NOTIFY_BIND_DRIVER) {
-		prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL);
-		if (prd)
-			devres_add(dev, prd);
-		else
-			dev_err(dev, "unable to alloc memory for runtime pm\n");
-	}
-
-	return 0;
-}
-
-#else /* CONFIG_PM_RUNTIME */
-
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct clk *clk;
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = pm_runtime_clk_suspend,
+		.runtime_resume = pm_runtime_clk_resume,
+		.runtime_idle = default_platform_runtime_idle,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
 
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
+#define DEFAULT_PWR_DOMAIN_PTR	(&default_power_domain)
 
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_enable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced on\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_disable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced off\n");
-		}
-		break;
-	}
+#else
 
-	return 0;
-}
+#define DEFAULT_PWR_DOMAIN_PTR	NULL
 
 #endif /* CONFIG_PM_RUNTIME */
 
-static struct notifier_block platform_bus_notifier = {
-	.notifier_call = platform_bus_notify
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = DEFAULT_PWR_DOMAIN_PTR,
+	.con_ids = { NULL, },
 };
 
 static int __init sh_pm_runtime_init(void)
 {
-	bus_register_notifier(&platform_bus_type, &platform_bus_notifier);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 	return 0;
 }
 core_initcall(sh_pm_runtime_init);
diff --git a/arch/arm/mach-tegra/gpio.c b/arch/arm/mach-tegra/gpio.c
index 76a3f654220f..65a1aba6823d 100644
--- a/arch/arm/mach-tegra/gpio.c
+++ b/arch/arm/mach-tegra/gpio.c
@@ -257,7 +257,8 @@ static void tegra_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
 void tegra_gpio_resume(void)
 {
 	unsigned long flags;
-	int b, p, i;
+	int b;
+	int p;
 
 	local_irq_save(flags);
 
@@ -280,7 +281,8 @@ void tegra_gpio_resume(void)
 void tegra_gpio_suspend(void)
 {
 	unsigned long flags;
-	int b, p, i;
+	int b;
+	int p;
 
 	local_irq_save(flags);
 	for (b = 0; b < ARRAY_SIZE(tegra_gpio_banks); b++) {
diff --git a/arch/arm/mach-tegra/include/mach/barriers.h b/arch/arm/mach-tegra/include/mach/barriers.h
index cc115174899b..425b42e91ef6 100644
--- a/arch/arm/mach-tegra/include/mach/barriers.h
+++ b/arch/arm/mach-tegra/include/mach/barriers.h
@@ -23,7 +23,7 @@
 
 #include <asm/outercache.h>
 
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		do { dsb(); outer_sync(); } while (0)
 #define mb()		wmb()
 
diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c
index 6d7c4eea4dcb..4459470c052d 100644
--- a/arch/arm/mach-tegra/tegra2_clocks.c
+++ b/arch/arm/mach-tegra/tegra2_clocks.c
@@ -1362,14 +1362,15 @@ static int tegra_clk_shared_bus_set_rate(struct clk *c, unsigned long rate)
 {
 	unsigned long flags;
 	int ret;
+	long new_rate = rate;
 
-	rate = clk_round_rate(c->parent, rate);
-	if (rate < 0)
-		return rate;
+	new_rate = clk_round_rate(c->parent, new_rate);
+	if (new_rate < 0)
+		return new_rate;
 
 	spin_lock_irqsave(&c->parent->spinlock, flags);
 
-	c->u.shared_bus_user.rate = rate;
+	c->u.shared_bus_user.rate = new_rate;
 	ret = tegra_clk_shared_bus_update(c->parent);
 
 	spin_unlock_irqrestore(&c->parent->spinlock, flags);
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index af913741e6ec..6e1907fa94f0 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -178,16 +178,15 @@ static struct i2c_board_info __initdata mop500_i2c0_devices[] = {
 		.irq		= NOMADIK_GPIO_TO_IRQ(217),
 		.platform_data  = &mop500_tc35892_data,
 	},
-};
-
-/* I2C0 devices only available prior to HREFv60 */
-static struct i2c_board_info __initdata mop500_i2c0_old_devices[] = {
+	/* I2C0 devices only available prior to HREFv60 */
 	{
 		I2C_BOARD_INFO("tps61052", 0x33),
 		.platform_data  = &mop500_tps61052_data,
 	},
 };
 
+#define NUM_PRE_V60_I2C0_DEVICES 1
+
 static struct i2c_board_info __initdata mop500_i2c2_devices[] = {
 	{
 		/* lp5521 LED driver, 1st device */
@@ -425,6 +424,8 @@ static void __init mop500_uart_init(void)
 
 static void __init mop500_init_machine(void)
 {
+	int i2c0_devs;
+
 	/*
 	 * The HREFv60 board removed a GPIO expander and routed
 	 * all these GPIO pins to the internal GPIO controller
@@ -448,11 +449,11 @@ static void __init mop500_init_machine(void)
 
 	platform_device_register(&ab8500_device);
 
-	i2c_register_board_info(0, mop500_i2c0_devices,
-				ARRAY_SIZE(mop500_i2c0_devices));
-	if (!machine_is_hrefv60())
-		i2c_register_board_info(0, mop500_i2c0_old_devices,
-					ARRAY_SIZE(mop500_i2c0_old_devices));
+	i2c0_devs = ARRAY_SIZE(mop500_i2c0_devices);
+	if (machine_is_hrefv60())
+		i2c0_devs -= NUM_PRE_V60_I2C0_DEVICES;
+
+	i2c_register_board_info(0, mop500_i2c0_devices, i2c0_devs);
 	i2c_register_board_info(2, mop500_i2c2_devices,
 				ARRAY_SIZE(mop500_i2c2_devices));
 }
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e5f6fc428348..e591513bb53e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -392,7 +392,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * Convert start_pfn/end_pfn to a struct page pointer.
 	 */
 	start_pg = pfn_to_page(start_pfn - 1) + 1;
-	end_pg = pfn_to_page(end_pfn);
+	end_pg = pfn_to_page(end_pfn - 1) + 1;
 
 	/*
 	 * Convert to physical addresses, and
@@ -426,6 +426,14 @@ static void __init free_unused_memmap(struct meminfo *mi)
 
 		bank_start = bank_pfn_start(bank);
 
+#ifdef CONFIG_SPARSEMEM
+		/*
+		 * Take care not to free memmap entries that don't exist
+		 * due to SPARSEMEM sections which aren't present.
+		 */
+		bank_start = min(bank_start,
+				 ALIGN(prev_bank_end, PAGES_PER_SECTION));
+#endif
 		/*
 		 * If we had a previous bank, and there is a space
 		 * between the current bank and the previous, free it.
@@ -440,6 +448,12 @@ static void __init free_unused_memmap(struct meminfo *mi)
 		 */
 		prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES);
 	}
+
+#ifdef CONFIG_SPARSEMEM
+	if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION))
+		free_memmap(prev_bank_end,
+			    ALIGN(prev_bank_end, PAGES_PER_SECTION));
+#endif
 }
 
 static void __init free_highpages(void)
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index afe209e1e1f8..74be05f3e03a 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -7,6 +7,7 @@
 #include <linux/shm.h>
 #include <linux/sched.h>
 #include <linux/io.h>
+#include <linux/personality.h>
 #include <linux/random.h>
 #include <asm/cputype.h>
 #include <asm/system.h>
@@ -82,7 +83,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	        mm->cached_hole_size = 0;
 	}
 	/* 8 bits of randomness in 20 address space bits */
-	if (current->flags & PF_RANDOMIZE)
+	if ((current->flags & PF_RANDOMIZE) &&
+	    !(current->personality & ADDR_NO_RANDOMIZE))
 		addr += (get_random_int() % (1 << 8)) << PAGE_SHIFT;
 
 full_search:
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index b46eb21f05c7..bf8a1d1cccb6 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -390,7 +390,7 @@ ENTRY(cpu_arm920_set_pte_ext)
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl	cpu_arm920_suspend_size
 .equ	cpu_arm920_suspend_size, 4 * 3
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_arm920_do_suspend)
 	stmfd	sp!, {r4 - r7, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ PID
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 6a4bdb2c94a7..0ed85d930c09 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -404,7 +404,7 @@ ENTRY(cpu_arm926_set_pte_ext)
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl	cpu_arm926_suspend_size
 .equ	cpu_arm926_suspend_size, 4 * 3
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_arm926_do_suspend)
 	stmfd	sp!, {r4 - r7, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ PID
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 74483d1977fe..184a9c997e36 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -171,7 +171,7 @@ ENTRY(cpu_sa1100_set_pte_ext)
 
 .globl	cpu_sa1100_suspend_size
 .equ	cpu_sa1100_suspend_size, 4*4
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_sa1100_do_suspend)
 	stmfd	sp!, {r4 - r7, lr}
 	mrc	p15, 0, r4, c3, c0, 0		@ domain ID
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index bfa0c9f611c5..7c99cb4c8e4f 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -124,7 +124,7 @@ ENTRY(cpu_v6_set_pte_ext)
 /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
 .globl	cpu_v6_suspend_size
 .equ	cpu_v6_suspend_size, 4 * 8
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_v6_do_suspend)
 	stmfd	sp!, {r4 - r11, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index c35618e42f6f..babfba09c89f 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -211,7 +211,7 @@ cpu_v7_name:
 /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
 .globl	cpu_v7_suspend_size
 .equ	cpu_v7_suspend_size, 4 * 8
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_v7_do_suspend)
 	stmfd	sp!, {r4 - r11, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 63d8b2044e84..596213699f37 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -417,7 +417,7 @@ ENTRY(cpu_xsc3_set_pte_ext)
 
 .globl	cpu_xsc3_suspend_size
 .equ	cpu_xsc3_suspend_size, 4 * 8
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_xsc3_do_suspend)
 	stmfd	sp!, {r4 - r10, lr}
 	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 086038cd86ab..42af97664c9d 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -395,7 +395,7 @@ ENTRY(xscale_dma_a0_map_area)
 	teq	r2, #DMA_TO_DEVICE
 	beq	xscale_dma_clean_range
 	b	xscale_dma_flush_range
-ENDPROC(xscsale_dma_a0_map_area)
+ENDPROC(xscale_dma_a0_map_area)
 
 /*
  *	dma_unmap_area(start, size, dir)
@@ -518,7 +518,7 @@ ENTRY(cpu_xscale_set_pte_ext)
 
 .globl	cpu_xscale_suspend_size
 .equ	cpu_xscale_suspend_size, 4 * 7
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_xscale_do_suspend)
 	stmfd	sp!, {r4 - r10, lr}
 	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
diff --git a/arch/arm/plat-mxc/gpio.c b/arch/arm/plat-mxc/gpio.c
index 7a107246fd98..6cd6d7f686f6 100644
--- a/arch/arm/plat-mxc/gpio.c
+++ b/arch/arm/plat-mxc/gpio.c
@@ -295,6 +295,12 @@ static int mxc_gpio_direction_output(struct gpio_chip *chip,
 	return 0;
 }
 
+/*
+ * This lock class tells lockdep that GPIO irqs are in a different
+ * category than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key gpio_lock_class;
+
 int __init mxc_gpio_init(struct mxc_gpio_port *port, int cnt)
 {
 	int i, j;
@@ -311,6 +317,7 @@ int __init mxc_gpio_init(struct mxc_gpio_port *port, int cnt)
 		__raw_writel(~0, port[i].base + GPIO_ISR);
 		for (j = port[i].virtual_irq_start;
 			j < port[i].virtual_irq_start + 32; j++) {
+			irq_set_lockdep_class(j, &gpio_lock_class);
 			irq_set_chip_and_handler(j, &gpio_irq_chip,
 						 handle_level_irq);
 			set_irq_flags(j, IRQF_VALID);
diff --git a/arch/arm/plat-mxc/ssi-fiq.S b/arch/arm/plat-mxc/ssi-fiq.S
index 4ddce565b353..8397a2dd19f2 100644
--- a/arch/arm/plat-mxc/ssi-fiq.S
+++ b/arch/arm/plat-mxc/ssi-fiq.S
@@ -124,6 +124,8 @@ imx_ssi_fiq_start:
 1:
 		@ return from FIQ
 		subs	pc, lr, #4
+
+		.align
 imx_ssi_fiq_base:
 		.word 0x0
 imx_ssi_fiq_rx_buffer:
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index d2adcdda23cf..bd9e32187eab 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -1372,9 +1372,7 @@ static const struct dev_pm_ops omap_mpuio_dev_pm_ops = {
 	.resume_noirq = omap_mpuio_resume_noirq,
 };
 
-/* use platform_driver for this, now that there's no longer any
- * point to sys_device (other than not disturbing old code).
- */
+/* use platform_driver for this. */
 static struct platform_driver omap_mpuio_driver = {
 	.driver		= {
 		.name	= "mpuio",
@@ -1745,7 +1743,7 @@ static int __devinit omap_gpio_probe(struct platform_device *pdev)
 }
 
 #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
-static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
+static int omap_gpio_suspend(void)
 {
 	int i;
 
@@ -1795,12 +1793,12 @@ static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
 	return 0;
 }
 
-static int omap_gpio_resume(struct sys_device *dev)
+static void omap_gpio_resume(void)
 {
 	int i;
 
 	if (!cpu_class_is_omap2() && !cpu_is_omap16xx())
-		return 0;
+		return;
 
 	for (i = 0; i < gpio_bank_count; i++) {
 		struct gpio_bank *bank = &gpio_bank[i];
@@ -1836,21 +1834,13 @@ static int omap_gpio_resume(struct sys_device *dev)
 		__raw_writel(bank->saved_wakeup, wake_set);
 		spin_unlock_irqrestore(&bank->lock, flags);
 	}
-
-	return 0;
 }
 
-static struct sysdev_class omap_gpio_sysclass = {
-	.name		= "gpio",
+static struct syscore_ops omap_gpio_syscore_ops = {
 	.suspend	= omap_gpio_suspend,
 	.resume		= omap_gpio_resume,
 };
 
-static struct sys_device omap_gpio_device = {
-	.id		= 0,
-	.cls		= &omap_gpio_sysclass,
-};
-
 #endif
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
@@ -2108,21 +2098,14 @@ postcore_initcall(omap_gpio_drv_reg);
 
 static int __init omap_gpio_sysinit(void)
 {
-	int ret = 0;
-
 	mpuio_init();
 
 #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
-	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
-		if (ret == 0) {
-			ret = sysdev_class_register(&omap_gpio_sysclass);
-			if (ret == 0)
-				ret = sysdev_register(&omap_gpio_device);
-		}
-	}
+	if (cpu_is_omap16xx() || cpu_class_is_omap2())
+		register_syscore_ops(&omap_gpio_syscore_ops);
 #endif
 
-	return ret;
+	return 0;
 }
 
 arch_initcall(omap_gpio_sysinit);
diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c
index 8a51fd58f656..34fc31ee9081 100644
--- a/arch/arm/plat-omap/iommu.c
+++ b/arch/arm/plat-omap/iommu.c
@@ -793,6 +793,8 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 	clk_enable(obj->clk);
 	errs = iommu_report_fault(obj, &da);
 	clk_disable(obj->clk);
+	if (errs == 0)
+		return IRQ_HANDLED;
 
 	/* Fault callback or TLB/PTE Dynamic loading */
 	if (obj->isr && !obj->isr(obj, da, errs, obj->isr_priv))
diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c
index 9bbda9acb73b..a37b8eb65b76 100644
--- a/arch/arm/plat-omap/omap_device.c
+++ b/arch/arm/plat-omap/omap_device.c
@@ -536,6 +536,28 @@ int omap_early_device_register(struct omap_device *od)
 	return 0;
 }
 
+static int _od_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return omap_device_idle(pdev);
+}
+
+static int _od_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return omap_device_enable(pdev);
+}
+
+static struct dev_power_domain omap_device_power_domain = {
+	.ops = {
+		.runtime_suspend = _od_runtime_suspend,
+		.runtime_resume = _od_runtime_resume,
+		USE_PLATFORM_PM_SLEEP_OPS
+	}
+};
+
 /**
  * omap_device_register - register an omap_device with one omap_hwmod
  * @od: struct omap_device * to register
@@ -549,6 +571,7 @@ int omap_device_register(struct omap_device *od)
 	pr_debug("omap_device: %s: registering\n", od->pdev.name);
 
 	od->pdev.dev.parent = &omap_device_parent;
+	od->pdev.dev.pwr_domain = &omap_device_power_domain;
 	return platform_device_register(&od->pdev);
 }
 
diff --git a/arch/arm/plat-pxa/gpio.c b/arch/arm/plat-pxa/gpio.c
index dce088f45678..48ebb9479b61 100644
--- a/arch/arm/plat-pxa/gpio.c
+++ b/arch/arm/plat-pxa/gpio.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 
 #include <mach/gpio.h>
@@ -295,7 +295,7 @@ void __init pxa_init_gpio(int mux_irq, int start, int end, set_wake_t fn)
 }
 
 #ifdef CONFIG_PM
-static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa_gpio_suspend(void)
 {
 	struct pxa_gpio_chip *c;
 	int gpio;
@@ -312,7 +312,7 @@ static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa_gpio_resume(struct sys_device *dev)
+static void pxa_gpio_resume(void)
 {
 	struct pxa_gpio_chip *c;
 	int gpio;
@@ -326,22 +326,13 @@ static int pxa_gpio_resume(struct sys_device *dev)
 		__raw_writel(c->saved_gfer, c->regbase + GFER_OFFSET);
 		__raw_writel(c->saved_gpdr, c->regbase + GPDR_OFFSET);
 	}
-	return 0;
 }
 #else
 #define pxa_gpio_suspend	NULL
 #define pxa_gpio_resume		NULL
 #endif
 
-struct sysdev_class pxa_gpio_sysclass = {
-	.name		= "gpio",
+struct syscore_ops pxa_gpio_syscore_ops = {
 	.suspend	= pxa_gpio_suspend,
 	.resume		= pxa_gpio_resume,
 };
-
-static int __init pxa_gpio_init(void)
-{
-	return sysdev_class_register(&pxa_gpio_sysclass);
-}
-
-core_initcall(pxa_gpio_init);
diff --git a/arch/arm/plat-pxa/mfp.c b/arch/arm/plat-pxa/mfp.c
index a9aa5ad3f4eb..be12eadcce20 100644
--- a/arch/arm/plat-pxa/mfp.c
+++ b/arch/arm/plat-pxa/mfp.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
 
 #include <plat/mfp.h>
 
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index 27ea852e3370..c10d10c56e2e 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -22,7 +22,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/io.h>
@@ -1195,19 +1195,12 @@ int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *d
 
 EXPORT_SYMBOL(s3c2410_dma_getposition);
 
-static inline struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev)
-{
-	return container_of(dev, struct s3c2410_dma_chan, dev);
-}
-
-/* system device class */
+/* system core operations */
 
 #ifdef CONFIG_PM
 
-static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state)
+static void s3c2410_dma_suspend_chan(s3c2410_dma_chan *cp)
 {
-	struct s3c2410_dma_chan *cp = to_dma_chan(dev);
-
 	printk(KERN_DEBUG "suspending dma channel %d\n", cp->number);
 
 	if (dma_rdreg(cp, S3C2410_DMA_DMASKTRIG) & S3C2410_DMASKTRIG_ON) {
@@ -1222,13 +1215,21 @@ static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state)
 
 		s3c2410_dma_dostop(cp);
 	}
+}
+
+static int s3c2410_dma_suspend(void)
+{
+	struct s3c2410_dma_chan *cp = s3c2410_chans;
+	int channel;
+
+	for (channel = 0; channel < dma_channels; cp++, channel++)
+		s3c2410_dma_suspend_chan(cp);
 
 	return 0;
 }
 
-static int s3c2410_dma_resume(struct sys_device *dev)
+static void s3c2410_dma_resume_chan(struct s3c2410_dma_chan *cp)
 {
-	struct s3c2410_dma_chan *cp = to_dma_chan(dev);
 	unsigned int no = cp->number | DMACH_LOW_LEVEL;
 
 	/* restore channel's hardware configuration */
@@ -1249,13 +1250,21 @@ static int s3c2410_dma_resume(struct sys_device *dev)
 	return 0;
 }
 
+static void s3c2410_dma_resume(void)
+{
+	struct s3c2410_dma_chan *cp = s3c2410_chans + dma_channels - 1;
+	int channel;
+
+	for (channel = dma_channels - 1; channel >= 0; cp++, channel--)
+		s3c2410_dma_resume_chan(cp);
+}
+
 #else
 #define s3c2410_dma_suspend NULL
 #define s3c2410_dma_resume  NULL
 #endif /* CONFIG_PM */
 
-struct sysdev_class dma_sysclass = {
-	.name		= "s3c24xx-dma",
+struct syscore_ops dma_syscore_ops = {
 	.suspend	= s3c2410_dma_suspend,
 	.resume		= s3c2410_dma_resume,
 };
@@ -1269,39 +1278,14 @@ static void s3c2410_dma_cache_ctor(void *p)
 
 /* initialisation code */
 
-static int __init s3c24xx_dma_sysclass_init(void)
+static int __init s3c24xx_dma_syscore_init(void)
 {
-	int ret = sysdev_class_register(&dma_sysclass);
-
-	if (ret != 0)
-		printk(KERN_ERR "dma sysclass registration failed\n");
-
-	return ret;
-}
-
-core_initcall(s3c24xx_dma_sysclass_init);
-
-static int __init s3c24xx_dma_sysdev_register(void)
-{
-	struct s3c2410_dma_chan *cp = s3c2410_chans;
-	int channel, ret;
-
-	for (channel = 0; channel < dma_channels; cp++, channel++) {
-		cp->dev.cls = &dma_sysclass;
-		cp->dev.id  = channel;
-		ret = sysdev_register(&cp->dev);
-
-		if (ret) {
-			printk(KERN_ERR "error registering dev for dma %d\n",
-			       channel);
- 			return ret;
-		}
-	}
+	register_syscore_ops(&dma_syscore_ops);
 
 	return 0;
 }
 
-late_initcall(s3c24xx_dma_sysdev_register);
+late_initcall(s3c24xx_dma_syscore_init);
 
 int __init s3c24xx_dma_init(unsigned int channels, unsigned int irq,
 			    unsigned int stride)
diff --git a/arch/arm/plat-s3c24xx/irq-pm.c b/arch/arm/plat-s3c24xx/irq-pm.c
index c3624d898630..0efb2e2848c8 100644
--- a/arch/arm/plat-s3c24xx/irq-pm.c
+++ b/arch/arm/plat-s3c24xx/irq-pm.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 #include <linux/irq.h>
 
 #include <plat/cpu.h>
@@ -65,7 +64,7 @@ static unsigned long save_extint[3];
 static unsigned long save_eintflt[4];
 static unsigned long save_eintmask;
 
-int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
+int s3c24xx_irq_suspend(void)
 {
 	unsigned int i;
 
@@ -81,7 +80,7 @@ int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-int s3c24xx_irq_resume(struct sys_device *dev)
+void s3c24xx_irq_resume(void)
 {
 	unsigned int i;
 
@@ -93,6 +92,4 @@ int s3c24xx_irq_resume(struct sys_device *dev)
 
 	s3c_pm_do_restore(irq_save, ARRAY_SIZE(irq_save));
 	__raw_writel(save_eintmask, S3C24XX_EINTMASK);
-
-	return 0;
 }
diff --git a/arch/arm/plat-s5p/irq-pm.c b/arch/arm/plat-s5p/irq-pm.c
index 5259ad458bc8..327acb3a4464 100644
--- a/arch/arm/plat-s5p/irq-pm.c
+++ b/arch/arm/plat-s5p/irq-pm.c
@@ -16,7 +16,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 
 #include <plat/cpu.h>
 #include <plat/irqs.h>
@@ -77,17 +76,15 @@ static struct sleep_save eint_save[] = {
 	SAVE_ITEM(S5P_EINT_MASK(3)),
 };
 
-int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
+int s3c24xx_irq_suspend(void)
 {
 	s3c_pm_do_save(eint_save, ARRAY_SIZE(eint_save));
 
 	return 0;
 }
 
-int s3c24xx_irq_resume(struct sys_device *dev)
+void s3c24xx_irq_resume(void)
 {
 	s3c_pm_do_restore(eint_save, ARRAY_SIZE(eint_save));
-
-	return 0;
 }
 
diff --git a/arch/arm/plat-s5p/pm.c b/arch/arm/plat-s5p/pm.c
index d592b6304b48..d15dc47b0e3d 100644
--- a/arch/arm/plat-s5p/pm.c
+++ b/arch/arm/plat-s5p/pm.c
@@ -19,17 +19,6 @@
 
 #define PFX "s5p pm: "
 
-/* s3c_pm_check_resume_pin
- *
- * check to see if the pin is configured correctly for sleep mode, and
- * make any necessary adjustments if it is not
-*/
-
-static void s3c_pm_check_resume_pin(unsigned int pin, unsigned int irqoffs)
-{
-	/* nothing here yet */
-}
-
 /* s3c_pm_configure_extint
  *
  * configure all external interrupt pins
diff --git a/arch/arm/plat-samsung/include/plat/cpu.h b/arch/arm/plat-samsung/include/plat/cpu.h
index cedfff51c82b..3aedac0034ba 100644
--- a/arch/arm/plat-samsung/include/plat/cpu.h
+++ b/arch/arm/plat-samsung/include/plat/cpu.h
@@ -68,6 +68,12 @@ extern void s3c24xx_init_uartdevs(char *name,
 struct sys_timer;
 extern struct sys_timer s3c24xx_timer;
 
+extern struct syscore_ops s3c2410_pm_syscore_ops;
+extern struct syscore_ops s3c2412_pm_syscore_ops;
+extern struct syscore_ops s3c2416_pm_syscore_ops;
+extern struct syscore_ops s3c244x_pm_syscore_ops;
+extern struct syscore_ops s3c64xx_irq_syscore_ops;
+
 /* system device classes */
 
 extern struct sysdev_class s3c2410_sysclass;
diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h
index 937cc2ace517..7fb6f6be8c81 100644
--- a/arch/arm/plat-samsung/include/plat/pm.h
+++ b/arch/arm/plat-samsung/include/plat/pm.h
@@ -103,14 +103,16 @@ extern void s3c_pm_do_restore_core(struct sleep_save *ptr, int count);
 
 #ifdef CONFIG_PM
 extern int s3c_irqext_wake(struct irq_data *data, unsigned int state);
-extern int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state);
-extern int s3c24xx_irq_resume(struct sys_device *dev);
+extern int s3c24xx_irq_suspend(void);
+extern void s3c24xx_irq_resume(void);
 #else
 #define s3c_irqext_wake NULL
 #define s3c24xx_irq_suspend NULL
 #define s3c24xx_irq_resume  NULL
 #endif
 
+extern struct syscore_ops s3c24xx_irq_syscore_ops;
+
 /* PM debug functions */
 
 #ifdef CONFIG_SAMSUNG_PM_DEBUG
diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c
index e4baf76f374a..6b733fafe7cd 100644
--- a/arch/arm/plat-samsung/pm-check.c
+++ b/arch/arm/plat-samsung/pm-check.c
@@ -164,7 +164,6 @@ static inline int in_region(void *ptr, int size, void *what, size_t whatsz)
  */
 static u32 *s3c_pm_runcheck(struct resource *res, u32 *val)
 {
-	void *save_at = phys_to_virt(s3c_sleep_save_phys);
 	unsigned long addr;
 	unsigned long left;
 	void *stkpage;
@@ -192,11 +191,6 @@ static u32 *s3c_pm_runcheck(struct resource *res, u32 *val)
 			goto skip_check;
 		}
 
-		if (in_region(ptr, left, save_at, 32*4 )) {
-			S3C_PMDBG("skipping %08lx, has save block in\n", addr);
-			goto skip_check;
-		}
-
 		/* calculate and check the checksum */
 
 		calc = crc32_le(~0, ptr, left);
diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c
index d5b58d31903c..5c0a440d6e16 100644
--- a/arch/arm/plat-samsung/pm.c
+++ b/arch/arm/plat-samsung/pm.c
@@ -214,8 +214,9 @@ void s3c_pm_do_restore_core(struct sleep_save *ptr, int count)
  *
  * print any IRQs asserted at resume time (ie, we woke from)
 */
-static void s3c_pm_show_resume_irqs(int start, unsigned long which,
-				    unsigned long mask)
+static void __maybe_unused s3c_pm_show_resume_irqs(int start,
+						   unsigned long which,
+						   unsigned long mask)
 {
 	int i;
 
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index bbf3da012afd..f25e7ec89416 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -78,6 +78,14 @@ static void vfp_thread_exit(struct thread_info *thread)
 	put_cpu();
 }
 
+static void vfp_thread_copy(struct thread_info *thread)
+{
+	struct thread_info *parent = current_thread_info();
+
+	vfp_sync_hwstate(parent);
+	thread->vfpstate = parent->vfpstate;
+}
+
 /*
  * When this function is called with the following 'cmd's, the following
  * is true while this function is being run:
@@ -104,12 +112,17 @@ static void vfp_thread_exit(struct thread_info *thread)
 static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v)
 {
 	struct thread_info *thread = v;
+	u32 fpexc;
+#ifdef CONFIG_SMP
+	unsigned int cpu;
+#endif
 
-	if (likely(cmd == THREAD_NOTIFY_SWITCH)) {
-		u32 fpexc = fmrx(FPEXC);
+	switch (cmd) {
+	case THREAD_NOTIFY_SWITCH:
+		fpexc = fmrx(FPEXC);
 
 #ifdef CONFIG_SMP
-		unsigned int cpu = thread->cpu;
+		cpu = thread->cpu;
 
 		/*
 		 * On SMP, if VFP is enabled, save the old state in
@@ -134,13 +147,20 @@ static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v)
 		 * old state.
 		 */
 		fmxr(FPEXC, fpexc & ~FPEXC_EN);
-		return NOTIFY_DONE;
-	}
+		break;
 
-	if (cmd == THREAD_NOTIFY_FLUSH)
+	case THREAD_NOTIFY_FLUSH:
 		vfp_thread_flush(thread);
-	else
+		break;
+
+	case THREAD_NOTIFY_EXIT:
 		vfp_thread_exit(thread);
+		break;
+
+	case THREAD_NOTIFY_COPY:
+		vfp_thread_copy(thread);
+		break;
+	}
 
 	return NOTIFY_DONE;
 }
@@ -378,9 +398,9 @@ static void vfp_enable(void *unused)
 }
 
 #ifdef CONFIG_PM
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
-static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state)
+static int vfp_pm_suspend(void)
 {
 	struct thread_info *ti = current_thread_info();
 	u32 fpexc = fmrx(FPEXC);
@@ -400,34 +420,25 @@ static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int vfp_pm_resume(struct sys_device *dev)
+static void vfp_pm_resume(void)
 {
 	/* ensure we have access to the vfp */
 	vfp_enable(NULL);
 
 	/* and disable it to ensure the next usage restores the state */
 	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
-
-	return 0;
 }
 
-static struct sysdev_class vfp_pm_sysclass = {
-	.name		= "vfp",
+static struct syscore_ops vfp_pm_syscore_ops = {
 	.suspend	= vfp_pm_suspend,
 	.resume		= vfp_pm_resume,
 };
 
-static struct sys_device vfp_pm_sysdev = {
-	.cls	= &vfp_pm_sysclass,
-};
-
 static void vfp_pm_init(void)
 {
-	sysdev_class_register(&vfp_pm_sysclass);
-	sysdev_register(&vfp_pm_sysdev);
+	register_syscore_ops(&vfp_pm_syscore_ops);
 }
 
-
 #else
 static inline void vfp_pm_init(void) { }
 #endif /* CONFIG_PM */
diff --git a/arch/avr32/include/asm/setup.h b/arch/avr32/include/asm/setup.h
index ff5b7cf6be4d..160543dbec7e 100644
--- a/arch/avr32/include/asm/setup.h
+++ b/arch/avr32/include/asm/setup.h
@@ -94,6 +94,13 @@ struct tag_ethernet {
 
 #define ETH_INVALID_PHY	0xff
 
+/* board information */
+#define ATAG_BOARDINFO	0x54410008
+
+struct tag_boardinfo {
+	u32	board_number;
+};
+
 struct tag {
 	struct tag_header hdr;
 	union {
@@ -102,6 +109,7 @@ struct tag {
 		struct tag_cmdline cmdline;
 		struct tag_clock clock;
 		struct tag_ethernet ethernet;
+		struct tag_boardinfo boardinfo;
 	} u;
 };
 
@@ -128,6 +136,7 @@ extern struct tag *bootloader_tags;
 
 extern resource_size_t fbmem_start;
 extern resource_size_t fbmem_size;
+extern u32 board_number;
 
 void setup_processor(void);
 
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 5c7083916c33..bb0974cce4ac 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -391,6 +391,21 @@ static int __init parse_tag_clock(struct tag *tag)
 __tagtable(ATAG_CLOCK, parse_tag_clock);
 
 /*
+ * The board_number correspond to the bd->bi_board_number in U-Boot. This
+ * parameter is only available during initialisation and can be used in some
+ * kind of board identification.
+ */
+u32 __initdata board_number;
+
+static int __init parse_tag_boardinfo(struct tag *tag)
+{
+	board_number = tag->u.boardinfo.board_number;
+
+	return 0;
+}
+__tagtable(ATAG_BOARDINFO, parse_tag_boardinfo);
+
+/*
  * Scan the tag table for this tag, and call its parse function. The
  * tag table is built by the linker from all the __tagtable
  * declarations.
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index b91b2044af9c..7aa25756412f 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -95,28 +95,6 @@ void _exception(long signr, struct pt_regs *regs, int code,
 	info.si_code = code;
 	info.si_addr = (void __user *)addr;
 	force_sig_info(signr, &info, current);
-
-	/*
-	 * Init gets no signals that it doesn't have a handler for.
-	 * That's all very well, but if it has caused a synchronous
-	 * exception and we ignore the resulting signal, it will just
-	 * generate the same exception over and over again and we get
-	 * nowhere.  Better to kill it and let the kernel panic.
-	 */
-	if (is_global_init(current)) {
-		__sighandler_t handler;
-
-		spin_lock_irq(&current->sighand->siglock);
-		handler = current->sighand->action[signr-1].sa.sa_handler;
-		spin_unlock_irq(&current->sighand->siglock);
-		if (handler == SIG_DFL) {
-			/* init has generated a synchronous exception
-			   and it doesn't have a handler for the signal */
-			printk(KERN_CRIT "init has generated signal %ld "
-			       "but has no handler for it\n", signr);
-			do_exit(signr);
-		}
-	}
 }
 
 asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs)
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c
index 442f08c5e641..86925fd6ea5b 100644
--- a/arch/avr32/mach-at32ap/clock.c
+++ b/arch/avr32/mach-at32ap/clock.c
@@ -35,22 +35,30 @@ void at32_clk_register(struct clk *clk)
 	spin_unlock(&clk_list_lock);
 }
 
-struct clk *clk_get(struct device *dev, const char *id)
+static struct clk *__clk_get(struct device *dev, const char *id)
 {
 	struct clk *clk;
 
-	spin_lock(&clk_list_lock);
-
 	list_for_each_entry(clk, &at32_clock_list, list) {
 		if (clk->dev == dev && strcmp(id, clk->name) == 0) {
-			spin_unlock(&clk_list_lock);
 			return clk;
 		}
 	}
 
-	spin_unlock(&clk_list_lock);
 	return ERR_PTR(-ENOENT);
 }
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	struct clk *clk;
+
+	spin_lock(&clk_list_lock);
+	clk = __clk_get(dev, id);
+	spin_unlock(&clk_list_lock);
+
+	return clk;
+}
+
 EXPORT_SYMBOL(clk_get);
 
 void clk_put(struct clk *clk)
@@ -257,15 +265,15 @@ static int clk_show(struct seq_file *s, void *unused)
 	spin_lock(&clk_list_lock);
 
 	/* show clock tree as derived from the three oscillators */
-	clk = clk_get(NULL, "osc32k");
+	clk = __clk_get(NULL, "osc32k");
 	dump_clock(clk, &r);
 	clk_put(clk);
 
-	clk = clk_get(NULL, "osc0");
+	clk = __clk_get(NULL, "osc0");
 	dump_clock(clk, &r);
 	clk_put(clk);
 
-	clk = clk_get(NULL, "osc1");
+	clk = __clk_get(NULL, "osc1");
 	dump_clock(clk, &r);
 	clk_put(clk);
 
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index 47ba4b9b6db1..fbc2aeaebddb 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -61,34 +61,34 @@ struct eic {
 static struct eic *nmi_eic;
 static bool nmi_enabled;
 
-static void eic_ack_irq(struct irq_chip *d)
+static void eic_ack_irq(struct irq_data *d)
 {
-	struct eic *eic = irq_data_get_irq_chip_data(data);
+	struct eic *eic = irq_data_get_irq_chip_data(d);
 	eic_writel(eic, ICR, 1 << (d->irq - eic->first_irq));
 }
 
-static void eic_mask_irq(struct irq_chip *d)
+static void eic_mask_irq(struct irq_data *d)
 {
-	struct eic *eic = irq_data_get_irq_chip_data(data);
+	struct eic *eic = irq_data_get_irq_chip_data(d);
 	eic_writel(eic, IDR, 1 << (d->irq - eic->first_irq));
 }
 
-static void eic_mask_ack_irq(struct irq_chip *d)
+static void eic_mask_ack_irq(struct irq_data *d)
 {
-	struct eic *eic = irq_data_get_irq_chip_data(data);
+	struct eic *eic = irq_data_get_irq_chip_data(d);
 	eic_writel(eic, ICR, 1 << (d->irq - eic->first_irq));
 	eic_writel(eic, IDR, 1 << (d->irq - eic->first_irq));
 }
 
-static void eic_unmask_irq(struct irq_chip *d)
+static void eic_unmask_irq(struct irq_data *d)
 {
-	struct eic *eic = irq_data_get_irq_chip_data(data);
+	struct eic *eic = irq_data_get_irq_chip_data(d);
 	eic_writel(eic, IER, 1 << (d->irq - eic->first_irq));
 }
 
-static int eic_set_irq_type(struct irq_chip *d, unsigned int flow_type)
+static int eic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
-	struct eic *eic = irq_data_get_irq_chip_data(data);
+	struct eic *eic = irq_data_get_irq_chip_data(d);
 	unsigned int irq = d->irq;
 	unsigned int i = irq - eic->first_irq;
 	u32 mode, edge, level;
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	int_irq = platform_get_irq(pdev, 0);
-	if (!regs || !int_irq) {
+	if (!regs || (int)int_irq <= 0) {
 		dev_dbg(&pdev->dev, "missing regs and/or irq resource\n");
 		return -ENXIO;
 	}
diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
index 21ce35f33aa5..3e3646186c9f 100644
--- a/arch/avr32/mach-at32ap/intc.c
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/io.h>
 
@@ -21,7 +21,6 @@
 struct intc {
 	void __iomem		*regs;
 	struct irq_chip		chip;
-	struct sys_device	sysdev;
 #ifdef CONFIG_PM
 	unsigned long		suspend_ipr;
 	unsigned long		saved_ipr[64];
@@ -146,9 +145,8 @@ void intc_set_suspend_handler(unsigned long offset)
 	intc0.suspend_ipr = offset;
 }
 
-static int intc_suspend(struct sys_device *sdev, pm_message_t state)
+static int intc_suspend(void)
 {
-	struct intc *intc = container_of(sdev, struct intc, sysdev);
 	int i;
 
 	if (unlikely(!irqs_disabled())) {
@@ -156,28 +154,25 @@ static int intc_suspend(struct sys_device *sdev, pm_message_t state)
 		return -EINVAL;
 	}
 
-	if (unlikely(!intc->suspend_ipr)) {
+	if (unlikely(!intc0.suspend_ipr)) {
 		pr_err("intc_suspend: suspend_ipr not initialized\n");
 		return -EINVAL;
 	}
 
 	for (i = 0; i < 64; i++) {
-		intc->saved_ipr[i] = intc_readl(intc, INTPR0 + 4 * i);
-		intc_writel(intc, INTPR0 + 4 * i, intc->suspend_ipr);
+		intc0.saved_ipr[i] = intc_readl(&intc0, INTPR0 + 4 * i);
+		intc_writel(&intc0, INTPR0 + 4 * i, intc0.suspend_ipr);
 	}
 
 	return 0;
 }
 
-static int intc_resume(struct sys_device *sdev)
+static int intc_resume(void)
 {
-	struct intc *intc = container_of(sdev, struct intc, sysdev);
 	int i;
 
-	WARN_ON(!irqs_disabled());
-
 	for (i = 0; i < 64; i++)
-		intc_writel(intc, INTPR0 + 4 * i, intc->saved_ipr[i]);
+		intc_writel(&intc0, INTPR0 + 4 * i, intc0.saved_ipr[i]);
 
 	return 0;
 }
@@ -186,27 +181,18 @@ static int intc_resume(struct sys_device *sdev)
 #define intc_resume	NULL
 #endif
 
-static struct sysdev_class intc_class = {
-	.name		= "intc",
+static struct syscore_ops intc_syscore_ops = {
 	.suspend	= intc_suspend,
 	.resume		= intc_resume,
 };
 
-static int __init intc_init_sysdev(void)
+static int __init intc_init_syscore(void)
 {
-	int ret;
-
-	ret = sysdev_class_register(&intc_class);
-	if (ret)
-		return ret;
+	register_syscore_ops(&intc_syscore_ops);
 
-	intc0.sysdev.id = 0;
-	intc0.sysdev.cls = &intc_class;
-	ret = sysdev_register(&intc0.sysdev);
-
-	return ret;
+	return 0;
 }
-device_initcall(intc_init_sysdev);
+device_initcall(intc_init_syscore);
 
 unsigned long intc_get_pending(unsigned int group)
 {
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index f308e1ddc629..2e0aa853a4bc 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -257,7 +257,7 @@ static void gpio_irq_mask(struct irq_data *d)
 	pio_writel(pio, IDR, 1 << (gpio & 0x1f));
 }
 
-static void gpio_irq_unmask(struct irq_data *d))
+static void gpio_irq_unmask(struct irq_data *d)
 {
 	unsigned		gpio = irq_to_gpio(d->irq);
 	struct pio_device	*pio = &pio_dev[gpio >> 5];
diff --git a/arch/avr32/mach-at32ap/pm-at32ap700x.S b/arch/avr32/mach-at32ap/pm-at32ap700x.S
index 17503b0ed6c9..f868f4ce761b 100644
--- a/arch/avr32/mach-at32ap/pm-at32ap700x.S
+++ b/arch/avr32/mach-at32ap/pm-at32ap700x.S
@@ -53,7 +53,7 @@ cpu_enter_idle:
 	st.w	r8[TI_flags], r9
 	unmask_interrupts
 	sleep	CPU_SLEEP_IDLE
-	.size	cpu_idle_sleep, . - cpu_idle_sleep
+	.size	cpu_enter_idle, . - cpu_enter_idle
 
 	/*
 	 * Common return path for PM functions that don't run from
diff --git a/arch/blackfin/include/asm/system.h b/arch/blackfin/include/asm/system.h
index 19e2c7c3e63a..44bd0cced725 100644
--- a/arch/blackfin/include/asm/system.h
+++ b/arch/blackfin/include/asm/system.h
@@ -19,11 +19,11 @@
  * Force strict CPU ordering.
  */
 #define nop()  __asm__ __volatile__ ("nop;\n\t" : : )
-#define mb()   __asm__ __volatile__ (""   : : : "memory")
-#define rmb()  __asm__ __volatile__ (""   : : : "memory")
-#define wmb()  __asm__ __volatile__ (""   : : : "memory")
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#define read_barrier_depends() 		do { } while(0)
+#define smp_mb()  mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_read_barrier_depends()	read_barrier_depends()
 
 #ifdef CONFIG_SMP
 asmlinkage unsigned long __raw_xchg_1_asm(volatile void *ptr, unsigned long value);
@@ -37,16 +37,16 @@ asmlinkage unsigned long __raw_cmpxchg_4_asm(volatile void *ptr,
 					unsigned long new, unsigned long old);
 
 #ifdef __ARCH_SYNC_CORE_DCACHE
-# define smp_mb()	do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0)
-# define smp_rmb()	do { barrier(); smp_check_barrier(); } while (0)
-# define smp_wmb()	do { barrier(); smp_mark_barrier(); } while (0)
-#define smp_read_barrier_depends()	do { barrier(); smp_check_barrier(); } while (0)
-
+/* Force Core data cache coherence */
+# define mb()	do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0)
+# define rmb()	do { barrier(); smp_check_barrier(); } while (0)
+# define wmb()	do { barrier(); smp_mark_barrier(); } while (0)
+# define read_barrier_depends()	do { barrier(); smp_check_barrier(); } while (0)
 #else
-# define smp_mb()	barrier()
-# define smp_rmb()	barrier()
-# define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	barrier()
+# define mb()	barrier()
+# define rmb()	barrier()
+# define wmb()	barrier()
+# define read_barrier_depends()	do { } while (0)
 #endif
 
 static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
@@ -99,10 +99,10 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 
 #else /* !CONFIG_SMP */
 
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
+#define mb()	barrier()
+#define rmb()	barrier()
+#define wmb()	barrier()
+#define read_barrier_depends()	do { } while (0)
 
 struct __xchg_dummy {
 	unsigned long a[100];
diff --git a/arch/blackfin/kernel/gptimers.c b/arch/blackfin/kernel/gptimers.c
index cdbe075de1dc..8b81dc04488a 100644
--- a/arch/blackfin/kernel/gptimers.c
+++ b/arch/blackfin/kernel/gptimers.c
@@ -268,7 +268,7 @@ void disable_gptimers(uint16_t mask)
 	_disable_gptimers(mask);
 	for (i = 0; i < MAX_BLACKFIN_GPTIMERS; ++i)
 		if (mask & (1 << i))
-			group_regs[BFIN_TIMER_OCTET(i)]->status |= trun_mask[i];
+			group_regs[BFIN_TIMER_OCTET(i)]->status = trun_mask[i];
 	SSYNC();
 }
 EXPORT_SYMBOL(disable_gptimers);
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 0b5f72f17fd0..401eb1d8e3b4 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -12,7 +12,7 @@
 
 #include <linux/bitops.h>
 #include <linux/hardirq.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/pm.h>
 #include <linux/nmi.h>
 #include <linux/smp.h>
@@ -196,43 +196,31 @@ void touch_nmi_watchdog(void)
 
 /* Suspend/resume support */
 #ifdef CONFIG_PM
-static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state)
+static int nmi_wdt_suspend(void)
 {
 	nmi_wdt_stop();
 	return 0;
 }
 
-static int nmi_wdt_resume(struct sys_device *dev)
+static void nmi_wdt_resume(void)
 {
 	if (nmi_active)
 		nmi_wdt_start();
-	return 0;
 }
 
-static struct sysdev_class nmi_sysclass = {
-	.name		= DRV_NAME,
+static struct syscore_ops nmi_syscore_ops = {
 	.resume		= nmi_wdt_resume,
 	.suspend	= nmi_wdt_suspend,
 };
 
-static struct sys_device device_nmi_wdt = {
-	.id	= 0,
-	.cls	= &nmi_sysclass,
-};
-
-static int __init init_nmi_wdt_sysfs(void)
+static int __init init_nmi_wdt_syscore(void)
 {
-	int error;
-
-	if (!nmi_active)
-		return 0;
+	if (nmi_active)
+		register_syscore_ops(&nmi_syscore_ops);
 
-	error = sysdev_class_register(&nmi_sysclass);
-	if (!error)
-		error = sysdev_register(&device_nmi_wdt);
-	return error;
+	return 0;
 }
-late_initcall(init_nmi_wdt_sysfs);
+late_initcall(init_nmi_wdt_syscore);
 
 #endif	/* CONFIG_PM */
 
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index 8c9a43daf80f..9e9b60d969dc 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -23,29 +23,6 @@
 #include <asm/gptimers.h>
 #include <asm/nmi.h>
 
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_khz * 10^3))
- *		ns = cycles * (10^6 / cpu_khz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^6 * SC / cpu_khz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
 #if defined(CONFIG_CYCLES_CLOCKSOURCE)
 
@@ -63,7 +40,6 @@ static struct clocksource bfin_cs_cycles = {
 	.rating		= 400,
 	.read		= bfin_read_cycles,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.shift		= CYC2NS_SCALE_FACTOR,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -75,10 +51,7 @@ static inline unsigned long long bfin_cs_cycles_sched_clock(void)
 
 static int __init bfin_cs_cycles_init(void)
 {
-	bfin_cs_cycles.mult = \
-		clocksource_hz2mult(get_cclk(), bfin_cs_cycles.shift);
-
-	if (clocksource_register(&bfin_cs_cycles))
+	if (clocksource_register_hz(&bfin_cs_cycles, get_cclk()))
 		panic("failed to register clocksource");
 
 	return 0;
@@ -111,7 +84,6 @@ static struct clocksource bfin_cs_gptimer0 = {
 	.rating		= 350,
 	.read		= bfin_read_gptimer0,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= CYC2NS_SCALE_FACTOR,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -125,10 +97,7 @@ static int __init bfin_cs_gptimer0_init(void)
 {
 	setup_gptimer0();
 
-	bfin_cs_gptimer0.mult = \
-		clocksource_hz2mult(get_sclk(), bfin_cs_gptimer0.shift);
-
-	if (clocksource_register(&bfin_cs_gptimer0))
+	if (clocksource_register_hz(&bfin_cs_gptimer0, get_sclk()))
 		panic("failed to register clocksource");
 
 	return 0;
@@ -206,8 +175,14 @@ irqreturn_t bfin_gptmr0_interrupt(int irq, void *dev_id)
 {
 	struct clock_event_device *evt = dev_id;
 	smp_mb();
-	evt->event_handler(evt);
+	/*
+	 * We want to ACK before we handle so that we can handle smaller timer
+	 * intervals.  This way if the timer expires again while we're handling
+	 * things, we're more likely to see that 2nd int rather than swallowing
+	 * it by ACKing the int at the end of this handler.
+	 */
 	bfin_gptmr0_ack();
+	evt->event_handler(evt);
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/blackfin/mach-common/dpmc.c b/arch/blackfin/mach-common/dpmc.c
index 382099fd5561..5e4112e518a9 100644
--- a/arch/blackfin/mach-common/dpmc.c
+++ b/arch/blackfin/mach-common/dpmc.c
@@ -19,9 +19,6 @@
 
 #define DRIVER_NAME "bfin dpmc"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, DRIVER_NAME, msg)
-
 struct bfin_dpmc_platform_data *pdata;
 
 /**
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 6e17a265c4d3..1fbd94c44457 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -109,10 +109,23 @@ static void ipi_flush_icache(void *info)
 	struct blackfin_flush_data *fdata = info;
 
 	/* Invalidate the memory holding the bounds of the flushed region. */
-	invalidate_dcache_range((unsigned long)fdata,
-		(unsigned long)fdata + sizeof(*fdata));
+	blackfin_dcache_invalidate_range((unsigned long)fdata,
+					 (unsigned long)fdata + sizeof(*fdata));
+
+	/* Make sure all write buffers in the data side of the core
+	 * are flushed before trying to invalidate the icache.  This
+	 * needs to be after the data flush and before the icache
+	 * flush so that the SSYNC does the right thing in preventing
+	 * the instruction prefetcher from hitting things in cached
+	 * memory at the wrong time -- it runs much further ahead than
+	 * the pipeline.
+	 */
+	SSYNC();
 
-	flush_icache_range(fdata->start, fdata->end);
+	/* ipi_flaush_icache is invoked by generic flush_icache_range,
+	 * so call blackfin arch icache flush directly here.
+	 */
+	blackfin_icache_flush_range(fdata->start, fdata->end);
 }
 
 static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
@@ -164,6 +177,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 	while (msg_queue->count) {
 		msg = &msg_queue->ipi_message[msg_queue->head];
 		switch (msg->type) {
+		case BFIN_IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
 		case BFIN_IPI_CALL_FUNC:
 			spin_unlock_irqrestore(&msg_queue->lock, flags);
 			ipi_call_function(cpu, msg);
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 4c9e3e1ba5d1..66cc75657e2f 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
 
 	ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
 
+	if (ipi.vector & IPI_SCHEDULE) {
+		scheduler_ipi();
+	}
 	if (ipi.vector & IPI_CALL) {
-	         func(info);
+		func(info);
 	}
 	if (ipi.vector & IPI_FLUSH_TLB) {
-		     if (flush_mm == FLUSH_ALL)
-			 __flush_tlb_all();
-		     else if (flush_vma == FLUSH_ALL)
+		if (flush_mm == FLUSH_ALL)
+			__flush_tlb_all();
+		else if (flush_vma == FLUSH_ALL)
 			__flush_tlb_mm(flush_mm);
-		     else
+		else
 			__flush_tlb_page(flush_vma, flush_addr);
 	}
 
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4ce8d1358fee..c04dd576f333 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -37,6 +37,7 @@
 #include <linux/crash_dump.h>
 #include <linux/iommu-helper.h>
 #include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
 
 #include <asm/delay.h>		/* ia64_get_itc() */
 #include <asm/io.h>
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
index 22f61526a8e1..f09b174244d5 100644
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -23,8 +23,6 @@
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Venkatesh Pallipadi");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -47,12 +45,12 @@ processor_set_pstate (
 {
 	s64 retval;
 
-	dprintk("processor_set_pstate\n");
+	pr_debug("processor_set_pstate\n");
 
 	retval = ia64_pal_set_pstate((u64)value);
 
 	if (retval) {
-		dprintk("Failed to set freq to 0x%x, with error 0x%lx\n",
+		pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n",
 		        value, retval);
 		return -ENODEV;
 	}
@@ -67,14 +65,14 @@ processor_get_pstate (
 	u64	pstate_index = 0;
 	s64 	retval;
 
-	dprintk("processor_get_pstate\n");
+	pr_debug("processor_get_pstate\n");
 
 	retval = ia64_pal_get_pstate(&pstate_index,
 	                             PAL_GET_PSTATE_TYPE_INSTANT);
 	*value = (u32) pstate_index;
 
 	if (retval)
-		dprintk("Failed to get current freq with "
+		pr_debug("Failed to get current freq with "
 			"error 0x%lx, idx 0x%x\n", retval, *value);
 
 	return (int)retval;
@@ -90,7 +88,7 @@ extract_clock (
 {
 	unsigned long i;
 
-	dprintk("extract_clock\n");
+	pr_debug("extract_clock\n");
 
 	for (i = 0; i < data->acpi_data.state_count; i++) {
 		if (value == data->acpi_data.states[i].status)
@@ -110,7 +108,7 @@ processor_get_freq (
 	cpumask_t		saved_mask;
 	unsigned long 		clock_freq;
 
-	dprintk("processor_get_freq\n");
+	pr_debug("processor_get_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -148,7 +146,7 @@ processor_set_freq (
 	cpumask_t		saved_mask;
 	int			retval;
 
-	dprintk("processor_set_freq\n");
+	pr_debug("processor_set_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -159,16 +157,16 @@ processor_set_freq (
 
 	if (state == data->acpi_data.state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n", state);
+			pr_debug("Called after resume, resetting to P%d\n", state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n", state);
+			pr_debug("Already at target state (P%d)\n", state);
 			retval = 0;
 			goto migrate_end;
 		}
 	}
 
-	dprintk("Transitioning from P%d to P%d\n",
+	pr_debug("Transitioning from P%d to P%d\n",
 		data->acpi_data.state, state);
 
 	/* cpufreq frequency struct */
@@ -186,7 +184,7 @@ processor_set_freq (
 
 	value = (u32) data->acpi_data.states[state].control;
 
-	dprintk("Transitioning to state: 0x%08x\n", value);
+	pr_debug("Transitioning to state: 0x%08x\n", value);
 
 	ret = processor_set_pstate(value);
 	if (ret) {
@@ -219,7 +217,7 @@ acpi_cpufreq_get (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
 
-	dprintk("acpi_cpufreq_get\n");
+	pr_debug("acpi_cpufreq_get\n");
 
 	return processor_get_freq(data, cpu);
 }
@@ -235,7 +233,7 @@ acpi_cpufreq_target (
 	unsigned int next_state = 0;
 	unsigned int result = 0;
 
-	dprintk("acpi_cpufreq_setpolicy\n");
+	pr_debug("acpi_cpufreq_setpolicy\n");
 
 	result = cpufreq_frequency_table_target(policy,
 			data->freq_table, target_freq, relation, &next_state);
@@ -255,7 +253,7 @@ acpi_cpufreq_verify (
 	unsigned int result = 0;
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	result = cpufreq_frequency_table_verify(policy,
 			data->freq_table);
@@ -273,7 +271,7 @@ acpi_cpufreq_cpu_init (
 	struct cpufreq_acpi_io	*data;
 	unsigned int		result = 0;
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 	data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
 	if (!data)
@@ -288,7 +286,7 @@ acpi_cpufreq_cpu_init (
 
 	/* capability check */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -297,7 +295,7 @@ acpi_cpufreq_cpu_init (
 					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (data->acpi_data.status_register.space_id !=
 					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Unsupported address space [%d, %d]\n",
+		pr_debug("Unsupported address space [%d, %d]\n",
 			(u32) (data->acpi_data.control_register.space_id),
 			(u32) (data->acpi_data.status_register.space_id));
 		result = -ENODEV;
@@ -348,7 +346,7 @@ acpi_cpufreq_cpu_init (
 	       "activated.\n", cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
 			(i == data->acpi_data.state?'*':' '), i,
 			(u32) data->acpi_data.states[i].core_frequency,
 			(u32) data->acpi_data.states[i].power,
@@ -383,7 +381,7 @@ acpi_cpufreq_cpu_exit (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -418,7 +416,7 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
 static int __init
 acpi_cpufreq_init (void)
 {
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
  	return cpufreq_register_driver(&acpi_cpufreq_driver);
 }
@@ -427,7 +425,7 @@ acpi_cpufreq_init (void)
 static void __exit
 acpi_cpufreq_exit (void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 	return;
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 1b811c61bdc6..f64097b5118a 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -31,8 +31,6 @@ static struct clocksource clocksource_cyclone = {
         .rating         = 300,
         .read           = read_cyclone,
         .mask           = (1LL << 40) - 1,
-        .mult           = 0, /*to be calculated*/
-        .shift          = 16,
         .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -118,9 +116,7 @@ int __init init_cyclone_clock(void)
 	/* initialize last tick */
 	cyclone_mc = cyclone_timer;
 	clocksource_cyclone.fsys_mmio = cyclone_timer;
-	clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
-						clocksource_cyclone.shift);
-	clocksource_register(&clocksource_cyclone);
+	clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
 
 	return 0;
 }
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 5b704740f160..782c3a357f24 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -31,6 +31,7 @@
 #include <linux/irq.h>
 #include <linux/ratelimit.h>
 #include <linux/acpi.h>
+#include <linux/sched.h>
 
 #include <asm/delay.h>
 #include <asm/intrinsics.h>
@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			smp_local_flush_tlb();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else {
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 156ad803d5b7..04440cc09b40 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -73,8 +73,6 @@ static struct clocksource clocksource_itc = {
 	.rating         = 350,
 	.read           = itc_get_cycles,
 	.mask           = CLOCKSOURCE_MASK(64),
-	.mult           = 0, /*to be calculated*/
-	.shift          = 16,
 	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 #ifdef CONFIG_PARAVIRT
 	.resume		= paravirt_clocksource_resume,
@@ -365,11 +363,8 @@ ia64_init_itm (void)
 	ia64_cpu_local_tick();
 
 	if (!itc_clocksource) {
-		/* Sort out mult/shift values: */
-		clocksource_itc.mult =
-			clocksource_hz2mult(local_cpu_data->itc_freq,
-						clocksource_itc.shift);
-		clocksource_register(&clocksource_itc);
+		clocksource_register_hz(&clocksource_itc,
+						local_cpu_data->itc_freq);
 		itc_clocksource = &clocksource_itc;
 	}
 }
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 787de4a77d82..53c0ba004e9e 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -209,6 +209,7 @@ SECTIONS {
 	data : {
 	} :data
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		_sdata  =  .;
 		INIT_TASK_DATA(PAGE_SIZE)
 		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
 		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 0799fea4c588..20b359376128 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/prefetch.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
index 5cdd7e4a597c..f7b798993cea 100644
--- a/arch/ia64/oprofile/backtrace.c
+++ b/arch/ia64/oprofile/backtrace.c
@@ -29,7 +29,7 @@ typedef struct
 	unsigned int depth;
 	struct pt_regs *regs;
 	struct unw_frame_info frame;
-	u64 *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
+	unsigned long *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
 } ia64_backtrace_t;
 
 /* Returns non-zero if the PC is in the Interrupt Vector Table */
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
index 21d6f09e3447..c34efda122e1 100644
--- a/arch/ia64/sn/kernel/sn2/timer.c
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -33,8 +33,6 @@ static struct clocksource clocksource_sn2 = {
         .rating         = 450,
         .read           = read_sn2,
         .mask           = (1LL << 55) - 1,
-        .mult           = 0,
-        .shift          = 10,
         .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -57,9 +55,7 @@ ia64_sn_udelay (unsigned long usecs)
 void __init sn_timer_init(void)
 {
 	clocksource_sn2.fsys_mmio = RTC_COUNTER_ADDR;
-	clocksource_sn2.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
-							clocksource_sn2.shift);
-	clocksource_register(&clocksource_sn2);
+	clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second);
 
 	ia64_udelay = &ia64_sn_udelay;
 }
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
index 108bb858acf2..b279e142c633 100644
--- a/arch/ia64/xen/irq_xen.c
+++ b/arch/ia64/xen/irq_xen.c
@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt;
 static int xen_slab_ready;
 
 #ifdef CONFIG_SMP
+#include <linux/sched.h>
+
 /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
  * it ends up to issue several memory accesses upon percpu data and
  * thus adds unnecessary traffic to other paths.
@@ -99,7 +101,13 @@ static int xen_slab_ready;
 static irqreturn_t
 xen_dummy_handler(int irq, void *dev_id)
 {
+	return IRQ_HANDLED;
+}
 
+static irqreturn_t
+xen_resched_handler(int irq, void *dev_id)
+{
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = {
 };
 
 static struct irqaction xen_resched_irqaction = {
-	.handler =	xen_dummy_handler,
+	.handler =	xen_resched_handler,
 	.flags =	IRQF_DISABLED,
 	.name =		"resched"
 };
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 31cef20b2996..fc10b39893d4 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id)
  *
  * Description:  This routine executes on CPU which received
  *               'RESCHEDULE_IPI'.
- *               Rescheduling is processed at the exit of interrupt
- *               operation.
  *
  * Born on Date: 2002.02.05
  *
@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id)
  *==========================================================================*/
 void smp_reschedule_interrupt(void)
 {
-	/* nothing to do */
+	scheduler_ipi();
 }
 
 /*==========================================================================*
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index c194d64cdbb9..cf95aec77460 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -44,6 +44,7 @@ SECTIONS
   EXCEPTION_TABLE(16)
   NOTES
 
+  _sdata = .;			/* Start of data section */
   RODATA
   RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
   _edata = .;			/* End of data section */
diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index b995513d527f..95022b04b62d 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -36,13 +36,10 @@
 
 /* Hook for MIDI serial driver */
 void (*atari_MIDI_interrupt_hook) (void);
-/* Hook for mouse driver */
-void (*atari_mouse_interrupt_hook) (char *);
 /* Hook for keyboard inputdev  driver */
 void (*atari_input_keyboard_interrupt_hook) (unsigned char, char);
 /* Hook for mouse inputdev  driver */
 void (*atari_input_mouse_interrupt_hook) (char *);
-EXPORT_SYMBOL(atari_mouse_interrupt_hook);
 EXPORT_SYMBOL(atari_input_keyboard_interrupt_hook);
 EXPORT_SYMBOL(atari_input_mouse_interrupt_hook);
 
@@ -263,8 +260,8 @@ repeat:
 			kb_state.buf[kb_state.len++] = scancode;
 			if (kb_state.len == 3) {
 				kb_state.state = KEYBOARD;
-				if (atari_mouse_interrupt_hook)
-					atari_mouse_interrupt_hook(kb_state.buf);
+				if (atari_input_mouse_interrupt_hook)
+					atari_input_mouse_interrupt_hook(kb_state.buf);
 			}
 			break;
 
@@ -575,7 +572,7 @@ int atari_keyb_init(void)
 	kb_state.len = 0;
 
 	error = request_irq(IRQ_MFP_ACIA, atari_keyboard_interrupt,
-			    IRQ_TYPE_SLOW, "keyboard/mouse/MIDI",
+			    IRQ_TYPE_SLOW, "keyboard,mouse,MIDI",
 			    atari_keyboard_interrupt);
 	if (error)
 		return error;
diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c
index 604329fafbb8..ddbf43ca8858 100644
--- a/arch/m68k/atari/stdma.c
+++ b/arch/m68k/atari/stdma.c
@@ -180,7 +180,7 @@ void __init stdma_init(void)
 {
 	stdma_isr = NULL;
 	if (request_irq(IRQ_MFP_FDC, stdma_int, IRQ_TYPE_SLOW | IRQF_SHARED,
-			"ST-DMA: floppy/ACSI/IDE/Falcon-SCSI", stdma_int))
+			"ST-DMA floppy,ACSI,IDE,Falcon-SCSI", stdma_int))
 		pr_err("Couldn't register ST-DMA interrupt\n");
 }
 
diff --git a/arch/m68k/include/asm/atarikb.h b/arch/m68k/include/asm/atarikb.h
index 546e7da5804f..68f3622bf591 100644
--- a/arch/m68k/include/asm/atarikb.h
+++ b/arch/m68k/include/asm/atarikb.h
@@ -34,8 +34,6 @@ void ikbd_joystick_disable(void);
 
 /* Hook for MIDI serial driver */
 extern void (*atari_MIDI_interrupt_hook) (void);
-/* Hook for mouse driver */
-extern void (*atari_mouse_interrupt_hook) (char *);
 /* Hook for keyboard inputdev  driver */
 extern void (*atari_input_keyboard_interrupt_hook) (unsigned char, char);
 /* Hook for mouse inputdev  driver */
diff --git a/arch/m68k/include/asm/bitops_mm.h b/arch/m68k/include/asm/bitops_mm.h
index 9d69f6e62365..e9020f88a748 100644
--- a/arch/m68k/include/asm/bitops_mm.h
+++ b/arch/m68k/include/asm/bitops_mm.h
@@ -181,14 +181,15 @@ static inline int find_first_zero_bit(const unsigned long *vaddr,
 {
 	const unsigned long *p = vaddr;
 	int res = 32;
+	unsigned int words;
 	unsigned long num;
 
 	if (!size)
 		return 0;
 
-	size = (size + 31) >> 5;
+	words = (size + 31) >> 5;
 	while (!(num = ~*p++)) {
-		if (!--size)
+		if (!--words)
 			goto out;
 	}
 
@@ -196,7 +197,8 @@ static inline int find_first_zero_bit(const unsigned long *vaddr,
 			      : "=d" (res) : "d" (num & -num));
 	res ^= 31;
 out:
-	return ((long)p - (long)vaddr - 4) * 8 + res;
+	res += ((long)p - (long)vaddr - 4) * 8;
+	return res < size ? res : size;
 }
 
 static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
@@ -215,27 +217,32 @@ static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
 		/* Look for zero in first longword */
 		__asm__ __volatile__ ("bfffo %1{#0,#0},%0"
 				      : "=d" (res) : "d" (num & -num));
-		if (res < 32)
-			return offset + (res ^ 31);
+		if (res < 32) {
+			offset += res ^ 31;
+			return offset < size ? offset : size;
+		}
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No zero yet, search remaining full bytes for a zero */
-	res = find_first_zero_bit(p, size - ((long)p - (long)vaddr) * 8);
-	return offset + res;
+	return offset + find_first_zero_bit(p, size - offset);
 }
 
 static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr;
 	int res = 32;
+	unsigned int words;
 	unsigned long num;
 
 	if (!size)
 		return 0;
 
-	size = (size + 31) >> 5;
+	words = (size + 31) >> 5;
 	while (!(num = *p++)) {
-		if (!--size)
+		if (!--words)
 			goto out;
 	}
 
@@ -243,7 +250,8 @@ static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
 			      : "=d" (res) : "d" (num & -num));
 	res ^= 31;
 out:
-	return ((long)p - (long)vaddr - 4) * 8 + res;
+	res += ((long)p - (long)vaddr - 4) * 8;
+	return res < size ? res : size;
 }
 
 static inline int find_next_bit(const unsigned long *vaddr, int size,
@@ -262,13 +270,17 @@ static inline int find_next_bit(const unsigned long *vaddr, int size,
 		/* Look for one in first longword */
 		__asm__ __volatile__ ("bfffo %1{#0,#0},%0"
 				      : "=d" (res) : "d" (num & -num));
-		if (res < 32)
-			return offset + (res ^ 31);
+		if (res < 32) {
+			offset += res ^ 31;
+			return offset < size ? offset : size;
+		}
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No one yet, search remaining full bytes for a one */
-	res = find_first_bit(p, size - ((long)p - (long)vaddr) * 8);
-	return offset + res;
+	return offset + find_first_bit(p, size - offset);
 }
 
 /*
@@ -366,23 +378,25 @@ static inline int test_bit_le(int nr, const void *vaddr)
 static inline int find_first_zero_bit_le(const void *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr, *addr = vaddr;
-	int res;
+	int res = 0;
+	unsigned int words;
 
 	if (!size)
 		return 0;
 
-	size = (size >> 5) + ((size & 31) > 0);
-	while (*p++ == ~0UL)
-	{
-		if (--size == 0)
-			return (p - addr) << 5;
+	words = (size >> 5) + ((size & 31) > 0);
+	while (*p++ == ~0UL) {
+		if (--words == 0)
+			goto out;
 	}
 
 	--p;
 	for (res = 0; res < 32; res++)
 		if (!test_bit_le(res, p))
 			break;
-	return (p - addr) * 32 + res;
+out:
+	res += (p - addr) * 32;
+	return res < size ? res : size;
 }
 
 static inline unsigned long find_next_zero_bit_le(const void *addr,
@@ -400,10 +414,15 @@ static inline unsigned long find_next_zero_bit_le(const void *addr,
 		offset -= bit;
 		/* Look for zero in first longword */
 		for (res = bit; res < 32; res++)
-			if (!test_bit_le(res, p))
-				return offset + res;
+			if (!test_bit_le(res, p)) {
+				offset += res;
+				return offset < size ? offset : size;
+			}
 		p++;
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No zero yet, search remaining full bytes for a zero */
 	return offset + find_first_zero_bit_le(p, size - offset);
@@ -412,22 +431,25 @@ static inline unsigned long find_next_zero_bit_le(const void *addr,
 static inline int find_first_bit_le(const void *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr, *addr = vaddr;
-	int res;
+	int res = 0;
+	unsigned int words;
 
 	if (!size)
 		return 0;
 
-	size = (size >> 5) + ((size & 31) > 0);
+	words = (size >> 5) + ((size & 31) > 0);
 	while (*p++ == 0UL) {
-		if (--size == 0)
-			return (p - addr) << 5;
+		if (--words == 0)
+			goto out;
 	}
 
 	--p;
 	for (res = 0; res < 32; res++)
 		if (test_bit_le(res, p))
 			break;
-	return (p - addr) * 32 + res;
+out:
+	res += (p - addr) * 32;
+	return res < size ? res : size;
 }
 
 static inline unsigned long find_next_bit_le(const void *addr,
@@ -445,10 +467,15 @@ static inline unsigned long find_next_bit_le(const void *addr,
 		offset -= bit;
 		/* Look for one in first longword */
 		for (res = bit; res < 32; res++)
-			if (test_bit_le(res, p))
-				return offset + res;
+			if (test_bit_le(res, p)) {
+				offset += res;
+				return offset < size ? offset : size;
+			}
 		p++;
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No set bit yet, search remaining full bytes for a set bit */
 	return offset + find_first_bit_le(p, size - offset);
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 26d851d385bb..f3b649de2a1b 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -22,7 +22,7 @@
 #define __NR_mknod		 14
 #define __NR_chmod		 15
 #define __NR_chown		 16
-#define __NR_break		 17
+/*#define __NR_break		 17*/
 #define __NR_oldstat		 18
 #define __NR_lseek		 19
 #define __NR_getpid		 20
@@ -36,11 +36,11 @@
 #define __NR_oldfstat		 28
 #define __NR_pause		 29
 #define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
+/*#define __NR_stty		 31*/
+/*#define __NR_gtty		 32*/
 #define __NR_access		 33
 #define __NR_nice		 34
-#define __NR_ftime		 35
+/*#define __NR_ftime		 35*/
 #define __NR_sync		 36
 #define __NR_kill		 37
 #define __NR_rename		 38
@@ -49,7 +49,7 @@
 #define __NR_dup		 41
 #define __NR_pipe		 42
 #define __NR_times		 43
-#define __NR_prof		 44
+/*#define __NR_prof		 44*/
 #define __NR_brk		 45
 #define __NR_setgid		 46
 #define __NR_getgid		 47
@@ -58,13 +58,13 @@
 #define __NR_getegid		 50
 #define __NR_acct		 51
 #define __NR_umount2		 52
-#define __NR_lock		 53
+/*#define __NR_lock		 53*/
 #define __NR_ioctl		 54
 #define __NR_fcntl		 55
-#define __NR_mpx		 56
+/*#define __NR_mpx		 56*/
 #define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_oldolduname	 59
+/*#define __NR_ulimit		 58*/
+/*#define __NR_oldolduname	 59*/
 #define __NR_umask		 60
 #define __NR_chroot		 61
 #define __NR_ustat		 62
@@ -103,10 +103,10 @@
 #define __NR_fchown		 95
 #define __NR_getpriority	 96
 #define __NR_setpriority	 97
-#define __NR_profil		 98
+/*#define __NR_profil		 98*/
 #define __NR_statfs		 99
 #define __NR_fstatfs		100
-#define __NR_ioperm		101
+/*#define __NR_ioperm		101*/
 #define __NR_socketcall		102
 #define __NR_syslog		103
 #define __NR_setitimer		104
@@ -114,11 +114,11 @@
 #define __NR_stat		106
 #define __NR_lstat		107
 #define __NR_fstat		108
-#define __NR_olduname		109
-#define __NR_iopl		/* 110 */ not supported
+/*#define __NR_olduname		109*/
+/*#define __NR_iopl		110*/ /* not supported */
 #define __NR_vhangup		111
-#define __NR_idle		/* 112 */ Obsolete
-#define __NR_vm86		/* 113 */ not supported
+/*#define __NR_idle		112*/ /* Obsolete */
+/*#define __NR_vm86		113*/ /* not supported */
 #define __NR_wait4		114
 #define __NR_swapoff		115
 #define __NR_sysinfo		116
@@ -132,17 +132,17 @@
 #define __NR_adjtimex		124
 #define __NR_mprotect		125
 #define __NR_sigprocmask	126
-#define __NR_create_module	127
+/*#define __NR_create_module	127*/
 #define __NR_init_module	128
 #define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
+/*#define __NR_get_kernel_syms	130*/
 #define __NR_quotactl		131
 #define __NR_getpgid		132
 #define __NR_fchdir		133
 #define __NR_bdflush		134
 #define __NR_sysfs		135
 #define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+/*#define __NR_afs_syscall	137*/ /* Syscall for Andrew File System */
 #define __NR_setfsuid		138
 #define __NR_setfsgid		139
 #define __NR__llseek		140
@@ -172,7 +172,7 @@
 #define __NR_setresuid		164
 #define __NR_getresuid		165
 #define __NR_getpagesize	166
-#define __NR_query_module	167
+/*#define __NR_query_module	167*/
 #define __NR_poll		168
 #define __NR_nfsservctl		169
 #define __NR_setresgid		170
@@ -193,8 +193,8 @@
 #define __NR_capset		185
 #define __NR_sigaltstack	186
 #define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
+/*#define __NR_getpmsg		188*/	/* some people actually want streams */
+/*#define __NR_putpmsg		189*/	/* some people actually want streams */
 #define __NR_vfork		190
 #define __NR_ugetrlimit		191
 #define __NR_mmap2		192
@@ -223,6 +223,8 @@
 #define __NR_setfsuid32		215
 #define __NR_setfsgid32		216
 #define __NR_pivot_root		217
+/* 218*/
+/* 219*/
 #define __NR_getdents64		220
 #define __NR_gettid		221
 #define __NR_tkill		222
@@ -281,7 +283,7 @@
 #define __NR_mq_notify		275
 #define __NR_mq_getsetattr	276
 #define __NR_waitid		277
-#define __NR_vserver		278
+/*#define __NR_vserver		278*/
 #define __NR_add_key		279
 #define __NR_request_key	280
 #define __NR_keyctl		281
@@ -343,10 +345,14 @@
 #define __NR_fanotify_init	337
 #define __NR_fanotify_mark	338
 #define __NR_prlimit64		339
+#define __NR_name_to_handle_at	340
+#define __NR_open_by_handle_at	341
+#define __NR_clock_adjtime	342
+#define __NR_syncfs		343
 
 #ifdef __KERNEL__
 
-#define NR_syscalls		340
+#define NR_syscalls		344
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/m68k/kernel/Makefile_mm b/arch/m68k/kernel/Makefile_mm
index 55d5d6b680a2..aced67804579 100644
--- a/arch/m68k/kernel/Makefile_mm
+++ b/arch/m68k/kernel/Makefile_mm
@@ -10,7 +10,7 @@ endif
 extra-y	+= vmlinux.lds
 
 obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
+	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o syscalltable.o
 
 devres-y = ../../../kernel/irq/devres.o
 
diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
index 1559dea36e55..bd0ec05263b2 100644
--- a/arch/m68k/kernel/entry_mm.S
+++ b/arch/m68k/kernel/entry_mm.S
@@ -407,347 +407,3 @@ resume:
 
 	rts
 
-.data
-ALIGN
-sys_call_table:
-	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
-	.long sys_exit
-	.long sys_fork
-	.long sys_read
-	.long sys_write
-	.long sys_open		/* 5 */
-	.long sys_close
-	.long sys_waitpid
-	.long sys_creat
-	.long sys_link
-	.long sys_unlink	/* 10 */
-	.long sys_execve
-	.long sys_chdir
-	.long sys_time
-	.long sys_mknod
-	.long sys_chmod		/* 15 */
-	.long sys_chown16
-	.long sys_ni_syscall				/* old break syscall holder */
-	.long sys_stat
-	.long sys_lseek
-	.long sys_getpid	/* 20 */
-	.long sys_mount
-	.long sys_oldumount
-	.long sys_setuid16
-	.long sys_getuid16
-	.long sys_stime		/* 25 */
-	.long sys_ptrace
-	.long sys_alarm
-	.long sys_fstat
-	.long sys_pause
-	.long sys_utime		/* 30 */
-	.long sys_ni_syscall				/* old stty syscall holder */
-	.long sys_ni_syscall				/* old gtty syscall holder */
-	.long sys_access
-	.long sys_nice
-	.long sys_ni_syscall	/* 35 */	/* old ftime syscall holder */
-	.long sys_sync
-	.long sys_kill
-	.long sys_rename
-	.long sys_mkdir
-	.long sys_rmdir		/* 40 */
-	.long sys_dup
-	.long sys_pipe
-	.long sys_times
-	.long sys_ni_syscall				/* old prof syscall holder */
-	.long sys_brk		/* 45 */
-	.long sys_setgid16
-	.long sys_getgid16
-	.long sys_signal
-	.long sys_geteuid16
-	.long sys_getegid16	/* 50 */
-	.long sys_acct
-	.long sys_umount				/* recycled never used phys() */
-	.long sys_ni_syscall				/* old lock syscall holder */
-	.long sys_ioctl
-	.long sys_fcntl		/* 55 */
-	.long sys_ni_syscall				/* old mpx syscall holder */
-	.long sys_setpgid
-	.long sys_ni_syscall				/* old ulimit syscall holder */
-	.long sys_ni_syscall
-	.long sys_umask		/* 60 */
-	.long sys_chroot
-	.long sys_ustat
-	.long sys_dup2
-	.long sys_getppid
-	.long sys_getpgrp	/* 65 */
-	.long sys_setsid
-	.long sys_sigaction
-	.long sys_sgetmask
-	.long sys_ssetmask
-	.long sys_setreuid16	/* 70 */
-	.long sys_setregid16
-	.long sys_sigsuspend
-	.long sys_sigpending
-	.long sys_sethostname
-	.long sys_setrlimit	/* 75 */
-	.long sys_old_getrlimit
-	.long sys_getrusage
-	.long sys_gettimeofday
-	.long sys_settimeofday
-	.long sys_getgroups16	/* 80 */
-	.long sys_setgroups16
-	.long sys_old_select
-	.long sys_symlink
-	.long sys_lstat
-	.long sys_readlink	/* 85 */
-	.long sys_uselib
-	.long sys_swapon
-	.long sys_reboot
-	.long sys_old_readdir
-	.long sys_old_mmap	/* 90 */
-	.long sys_munmap
-	.long sys_truncate
-	.long sys_ftruncate
-	.long sys_fchmod
-	.long sys_fchown16	/* 95 */
-	.long sys_getpriority
-	.long sys_setpriority
-	.long sys_ni_syscall				/* old profil syscall holder */
-	.long sys_statfs
-	.long sys_fstatfs	/* 100 */
-	.long sys_ni_syscall				/* ioperm for i386 */
-	.long sys_socketcall
-	.long sys_syslog
-	.long sys_setitimer
-	.long sys_getitimer	/* 105 */
-	.long sys_newstat
-	.long sys_newlstat
-	.long sys_newfstat
-	.long sys_ni_syscall
-	.long sys_ni_syscall	/* 110 */	/* iopl for i386 */
-	.long sys_vhangup
-	.long sys_ni_syscall				/* obsolete idle() syscall */
-	.long sys_ni_syscall				/* vm86old for i386 */
-	.long sys_wait4
-	.long sys_swapoff	/* 115 */
-	.long sys_sysinfo
-	.long sys_ipc
-	.long sys_fsync
-	.long sys_sigreturn
-	.long sys_clone		/* 120 */
-	.long sys_setdomainname
-	.long sys_newuname
-	.long sys_cacheflush				/* modify_ldt for i386 */
-	.long sys_adjtimex
-	.long sys_mprotect	/* 125 */
-	.long sys_sigprocmask
-	.long sys_ni_syscall		/* old "create_module" */
-	.long sys_init_module
-	.long sys_delete_module
-	.long sys_ni_syscall	/* 130 - old "get_kernel_syms" */
-	.long sys_quotactl
-	.long sys_getpgid
-	.long sys_fchdir
-	.long sys_bdflush
-	.long sys_sysfs		/* 135 */
-	.long sys_personality
-	.long sys_ni_syscall				/* for afs_syscall */
-	.long sys_setfsuid16
-	.long sys_setfsgid16
-	.long sys_llseek	/* 140 */
-	.long sys_getdents
-	.long sys_select
-	.long sys_flock
-	.long sys_msync
-	.long sys_readv		/* 145 */
-	.long sys_writev
-	.long sys_getsid
-	.long sys_fdatasync
-	.long sys_sysctl
-	.long sys_mlock		/* 150 */
-	.long sys_munlock
-	.long sys_mlockall
-	.long sys_munlockall
-	.long sys_sched_setparam
-	.long sys_sched_getparam	/* 155 */
-	.long sys_sched_setscheduler
-	.long sys_sched_getscheduler
-	.long sys_sched_yield
-	.long sys_sched_get_priority_max
-	.long sys_sched_get_priority_min  /* 160 */
-	.long sys_sched_rr_get_interval
-	.long sys_nanosleep
-	.long sys_mremap
-	.long sys_setresuid16
-	.long sys_getresuid16	/* 165 */
-	.long sys_getpagesize
-	.long sys_ni_syscall		/* old sys_query_module */
-	.long sys_poll
-	.long sys_nfsservctl
-	.long sys_setresgid16	/* 170 */
-	.long sys_getresgid16
-	.long sys_prctl
-	.long sys_rt_sigreturn
-	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask	/* 175 */
-	.long sys_rt_sigpending
-	.long sys_rt_sigtimedwait
-	.long sys_rt_sigqueueinfo
-	.long sys_rt_sigsuspend
-	.long sys_pread64	/* 180 */
-	.long sys_pwrite64
-	.long sys_lchown16;
-	.long sys_getcwd
-	.long sys_capget
-	.long sys_capset	/* 185 */
-	.long sys_sigaltstack
-	.long sys_sendfile
-	.long sys_ni_syscall				/* streams1 */
-	.long sys_ni_syscall				/* streams2 */
-	.long sys_vfork		/* 190 */
-	.long sys_getrlimit
-	.long sys_mmap2
-	.long sys_truncate64
-	.long sys_ftruncate64
-	.long sys_stat64	/* 195 */
-	.long sys_lstat64
-	.long sys_fstat64
-	.long sys_chown
-	.long sys_getuid
-	.long sys_getgid	/* 200 */
-	.long sys_geteuid
-	.long sys_getegid
-	.long sys_setreuid
-	.long sys_setregid
-	.long sys_getgroups	/* 205 */
-	.long sys_setgroups
-	.long sys_fchown
-	.long sys_setresuid
-	.long sys_getresuid
-	.long sys_setresgid	/* 210 */
-	.long sys_getresgid
-	.long sys_lchown
-	.long sys_setuid
-	.long sys_setgid
-	.long sys_setfsuid	/* 215 */
-	.long sys_setfsgid
-	.long sys_pivot_root
-	.long sys_ni_syscall
-	.long sys_ni_syscall
-	.long sys_getdents64	/* 220 */
-	.long sys_gettid
-	.long sys_tkill
-	.long sys_setxattr
-	.long sys_lsetxattr
-	.long sys_fsetxattr	/* 225 */
-	.long sys_getxattr
-	.long sys_lgetxattr
-	.long sys_fgetxattr
-	.long sys_listxattr
-	.long sys_llistxattr	/* 230 */
-	.long sys_flistxattr
-	.long sys_removexattr
-	.long sys_lremovexattr
-	.long sys_fremovexattr
-	.long sys_futex		/* 235 */
-	.long sys_sendfile64
-	.long sys_mincore
-	.long sys_madvise
-	.long sys_fcntl64
-	.long sys_readahead	/* 240 */
-	.long sys_io_setup
-	.long sys_io_destroy
-	.long sys_io_getevents
-	.long sys_io_submit
-	.long sys_io_cancel	/* 245 */
-	.long sys_fadvise64
-	.long sys_exit_group
-	.long sys_lookup_dcookie
-	.long sys_epoll_create
-	.long sys_epoll_ctl	/* 250 */
-	.long sys_epoll_wait
-	.long sys_remap_file_pages
-	.long sys_set_tid_address
-	.long sys_timer_create
-	.long sys_timer_settime	/* 255 */
-	.long sys_timer_gettime
-	.long sys_timer_getoverrun
-	.long sys_timer_delete
-	.long sys_clock_settime
-	.long sys_clock_gettime	/* 260 */
-	.long sys_clock_getres
-	.long sys_clock_nanosleep
-	.long sys_statfs64
-	.long sys_fstatfs64
-	.long sys_tgkill	/* 265 */
-	.long sys_utimes
-	.long sys_fadvise64_64
-	.long sys_mbind
-	.long sys_get_mempolicy
-	.long sys_set_mempolicy	/* 270 */
-	.long sys_mq_open
-	.long sys_mq_unlink
-	.long sys_mq_timedsend
-	.long sys_mq_timedreceive
-	.long sys_mq_notify	/* 275 */
-	.long sys_mq_getsetattr
-	.long sys_waitid
-	.long sys_ni_syscall	/* for sys_vserver */
-	.long sys_add_key
-	.long sys_request_key	/* 280 */
-	.long sys_keyctl
-	.long sys_ioprio_set
-	.long sys_ioprio_get
-	.long sys_inotify_init
-	.long sys_inotify_add_watch	/* 285 */
-	.long sys_inotify_rm_watch
-	.long sys_migrate_pages
-	.long sys_openat
-	.long sys_mkdirat
-	.long sys_mknodat		/* 290 */
-	.long sys_fchownat
-	.long sys_futimesat
-	.long sys_fstatat64
-	.long sys_unlinkat
-	.long sys_renameat		/* 295 */
-	.long sys_linkat
-	.long sys_symlinkat
-	.long sys_readlinkat
-	.long sys_fchmodat
-	.long sys_faccessat		/* 300 */
-	.long sys_ni_syscall		/* Reserved for pselect6 */
-	.long sys_ni_syscall		/* Reserved for ppoll */
-	.long sys_unshare
-	.long sys_set_robust_list
-	.long sys_get_robust_list	/* 305 */
-	.long sys_splice
-	.long sys_sync_file_range
-	.long sys_tee
-	.long sys_vmsplice
-	.long sys_move_pages		/* 310 */
-	.long sys_sched_setaffinity
-	.long sys_sched_getaffinity
-	.long sys_kexec_load
-	.long sys_getcpu
-	.long sys_epoll_pwait		/* 315 */
-	.long sys_utimensat
-	.long sys_signalfd
-	.long sys_timerfd_create
-	.long sys_eventfd
-	.long sys_fallocate		/* 320 */
-	.long sys_timerfd_settime
-	.long sys_timerfd_gettime
-	.long sys_signalfd4
-	.long sys_eventfd2
-	.long sys_epoll_create1		/* 325 */
-	.long sys_dup3
-	.long sys_pipe2
-	.long sys_inotify_init1
-	.long sys_preadv
-	.long sys_pwritev		/* 330 */
-	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_event_open
-	.long sys_get_thread_area
-	.long sys_set_thread_area
-	.long sys_atomic_cmpxchg_32	/* 335 */
-	.long sys_atomic_barrier
-	.long sys_fanotify_init
-	.long sys_fanotify_mark
-	.long sys_prlimit64
-
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 79b1ed198c07..5909e392cb1e 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -1,6 +1,4 @@
 /*
- *  linux/arch/m68knommu/kernel/syscalltable.S
- *
  *  Copyright (C) 2002, Greg Ungerer (gerg@snapgear.com)
  *
  *  Based on older entry.S files, the following copyrights apply:
@@ -9,171 +7,176 @@
  *                      Kenneth Albanowski <kjahds@kjahds.com>,
  *  Copyright (C) 2000  Lineo Inc. (www.lineo.com) 
  *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Linux/m68k support by Hamish Macdonald
  */
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
-#include <asm/unistd.h>
 
-.text
+#ifndef CONFIG_MMU
+#define sys_mmap2		sys_mmap_pgoff
+#endif
+
+.section .rodata
 ALIGN
 ENTRY(sys_call_table)
-	.long sys_restart_syscall	/* 0  -  old "setup()" system call */
+	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
 	.long sys_exit
 	.long sys_fork
 	.long sys_read
 	.long sys_write
-	.long sys_open		/* 5 */
+	.long sys_open			/* 5 */
 	.long sys_close
 	.long sys_waitpid
 	.long sys_creat
 	.long sys_link
-	.long sys_unlink	/* 10 */
+	.long sys_unlink		/* 10 */
 	.long sys_execve
 	.long sys_chdir
 	.long sys_time
 	.long sys_mknod
-	.long sys_chmod		/* 15 */
+	.long sys_chmod			/* 15 */
 	.long sys_chown16
-	.long sys_ni_syscall	/* old break syscall holder */
+	.long sys_ni_syscall		/* old break syscall holder */
 	.long sys_stat
 	.long sys_lseek
-	.long sys_getpid	/* 20 */
+	.long sys_getpid		/* 20 */
 	.long sys_mount
 	.long sys_oldumount
 	.long sys_setuid16
 	.long sys_getuid16
-	.long sys_stime		/* 25 */
+	.long sys_stime			/* 25 */
 	.long sys_ptrace
 	.long sys_alarm
 	.long sys_fstat
 	.long sys_pause
-	.long sys_utime		/* 30 */
-	.long sys_ni_syscall	/* old stty syscall holder */
-	.long sys_ni_syscall	/* old gtty syscall holder */
+	.long sys_utime			/* 30 */
+	.long sys_ni_syscall		/* old stty syscall holder */
+	.long sys_ni_syscall		/* old gtty syscall holder */
 	.long sys_access
 	.long sys_nice
-	.long sys_ni_syscall	/* 35 */ /* old ftime syscall holder */
+	.long sys_ni_syscall		/* 35 - old ftime syscall holder */
 	.long sys_sync
 	.long sys_kill
 	.long sys_rename
 	.long sys_mkdir
-	.long sys_rmdir		/* 40 */
+	.long sys_rmdir			/* 40 */
 	.long sys_dup
 	.long sys_pipe
 	.long sys_times
-	.long sys_ni_syscall	/* old prof syscall holder */
-	.long sys_brk		/* 45 */
+	.long sys_ni_syscall		/* old prof syscall holder */
+	.long sys_brk			/* 45 */
 	.long sys_setgid16
 	.long sys_getgid16
 	.long sys_signal
 	.long sys_geteuid16
-	.long sys_getegid16	/* 50 */
+	.long sys_getegid16		/* 50 */
 	.long sys_acct
-	.long sys_umount	/* recycled never used phys() */
-	.long sys_ni_syscall	/* old lock syscall holder */
+	.long sys_umount		/* recycled never used phys() */
+	.long sys_ni_syscall		/* old lock syscall holder */
 	.long sys_ioctl
-	.long sys_fcntl		/* 55 */
-	.long sys_ni_syscall	/* old mpx syscall holder */
+	.long sys_fcntl			/* 55 */
+	.long sys_ni_syscall		/* old mpx syscall holder */
 	.long sys_setpgid
-	.long sys_ni_syscall	/* old ulimit syscall holder */
+	.long sys_ni_syscall		/* old ulimit syscall holder */
 	.long sys_ni_syscall
-	.long sys_umask		/* 60 */
+	.long sys_umask			/* 60 */
 	.long sys_chroot
 	.long sys_ustat
 	.long sys_dup2
 	.long sys_getppid
-	.long sys_getpgrp	/* 65 */
+	.long sys_getpgrp		/* 65 */
 	.long sys_setsid
 	.long sys_sigaction
 	.long sys_sgetmask
 	.long sys_ssetmask
-	.long sys_setreuid16	/* 70 */
+	.long sys_setreuid16		/* 70 */
 	.long sys_setregid16
 	.long sys_sigsuspend
 	.long sys_sigpending
 	.long sys_sethostname
-	.long sys_setrlimit	/* 75 */
+	.long sys_setrlimit		/* 75 */
 	.long sys_old_getrlimit
 	.long sys_getrusage
 	.long sys_gettimeofday
 	.long sys_settimeofday
-	.long sys_getgroups16	/* 80 */
+	.long sys_getgroups16		/* 80 */
 	.long sys_setgroups16
 	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
-	.long sys_readlink	/* 85 */
+	.long sys_readlink		/* 85 */
 	.long sys_uselib
-	.long sys_ni_syscall	/* sys_swapon */
+	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long sys_old_mmap	/* 90 */
+	.long sys_old_mmap		/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
 	.long sys_fchmod
-	.long sys_fchown16	/* 95 */
+	.long sys_fchown16		/* 95 */
 	.long sys_getpriority
 	.long sys_setpriority
-	.long sys_ni_syscall	/* old profil syscall holder */
+	.long sys_ni_syscall		/* old profil syscall holder */
 	.long sys_statfs
-	.long sys_fstatfs	/* 100 */
-	.long sys_ni_syscall	/* ioperm for i386 */
+	.long sys_fstatfs		/* 100 */
+	.long sys_ni_syscall		/* ioperm for i386 */
 	.long sys_socketcall
 	.long sys_syslog
 	.long sys_setitimer
-	.long sys_getitimer	/* 105 */
+	.long sys_getitimer		/* 105 */
 	.long sys_newstat
 	.long sys_newlstat
 	.long sys_newfstat
 	.long sys_ni_syscall
-	.long sys_ni_syscall	/* iopl for i386 */ /* 110 */
+	.long sys_ni_syscall		/* 110 - iopl for i386 */
 	.long sys_vhangup
-	.long sys_ni_syscall	/* obsolete idle() syscall */
-	.long sys_ni_syscall	/* vm86old for i386 */
+	.long sys_ni_syscall		/* obsolete idle() syscall */
+	.long sys_ni_syscall		/* vm86old for i386 */
 	.long sys_wait4
-	.long sys_ni_syscall	/* 115 */ /* sys_swapoff */
+	.long sys_swapoff		/* 115 */
 	.long sys_sysinfo
 	.long sys_ipc
 	.long sys_fsync
 	.long sys_sigreturn
-	.long sys_clone		/* 120 */
+	.long sys_clone			/* 120 */
 	.long sys_setdomainname
 	.long sys_newuname
-	.long sys_cacheflush	/* modify_ldt for i386 */
+	.long sys_cacheflush		/* modify_ldt for i386 */
 	.long sys_adjtimex
-	.long sys_ni_syscall	/* 125 */ /* sys_mprotect */
+	.long sys_mprotect		/* 125 */
 	.long sys_sigprocmask
-	.long sys_ni_syscall	/* old "creat_module" */
+	.long sys_ni_syscall		/* old "create_module" */
 	.long sys_init_module
 	.long sys_delete_module
-	.long sys_ni_syscall	/* 130: old "get_kernel_syms" */
+	.long sys_ni_syscall		/* 130 - old "get_kernel_syms" */
 	.long sys_quotactl
 	.long sys_getpgid
 	.long sys_fchdir
 	.long sys_bdflush
-	.long sys_sysfs		/* 135 */
+	.long sys_sysfs			/* 135 */
 	.long sys_personality
-	.long sys_ni_syscall	/* for afs_syscall */
+	.long sys_ni_syscall		/* for afs_syscall */
 	.long sys_setfsuid16
 	.long sys_setfsgid16
-	.long sys_llseek	/* 140 */
+	.long sys_llseek		/* 140 */
 	.long sys_getdents
 	.long sys_select
 	.long sys_flock
-	.long sys_ni_syscall	/* sys_msync */
-	.long sys_readv		/* 145 */
+	.long sys_msync
+	.long sys_readv			/* 145 */
 	.long sys_writev
 	.long sys_getsid
 	.long sys_fdatasync
 	.long sys_sysctl
-	.long sys_ni_syscall	/* 150 */ /* sys_mlock */
-	.long sys_ni_syscall	/* sys_munlock */
-	.long sys_ni_syscall	/* sys_mlockall */
-	.long sys_ni_syscall	/* sys_munlockall */
+	.long sys_mlock			/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
 	.long sys_sched_setparam
-	.long sys_sched_getparam /* 155 */
+	.long sys_sched_getparam	/* 155 */
 	.long sys_sched_setscheduler
 	.long sys_sched_getscheduler
 	.long sys_sched_yield
@@ -181,124 +184,124 @@ ENTRY(sys_call_table)
 	.long sys_sched_get_priority_min  /* 160 */
 	.long sys_sched_rr_get_interval
 	.long sys_nanosleep
-	.long sys_ni_syscall	/* sys_mremap */
+	.long sys_mremap
 	.long sys_setresuid16
-	.long sys_getresuid16	/* 165 */
-	.long sys_getpagesize	/* sys_getpagesize */
-	.long sys_ni_syscall	/* old "query_module" */
+	.long sys_getresuid16		/* 165 */
+	.long sys_getpagesize
+	.long sys_ni_syscall		/* old "query_module" */
 	.long sys_poll
-	.long sys_ni_syscall	/* sys_nfsservctl */
-	.long sys_setresgid16	/* 170 */
+	.long sys_nfsservctl
+	.long sys_setresgid16		/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
 	.long sys_rt_sigreturn
 	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask /* 175 */
+	.long sys_rt_sigprocmask	/* 175 */
 	.long sys_rt_sigpending
 	.long sys_rt_sigtimedwait
 	.long sys_rt_sigqueueinfo
 	.long sys_rt_sigsuspend
-	.long sys_pread64	/* 180 */
+	.long sys_pread64		/* 180 */
 	.long sys_pwrite64
 	.long sys_lchown16
 	.long sys_getcwd
 	.long sys_capget
-	.long sys_capset	/* 185 */
+	.long sys_capset		/* 185 */
 	.long sys_sigaltstack
 	.long sys_sendfile
-	.long sys_ni_syscall	/* streams1 */
-	.long sys_ni_syscall	/* streams2 */
-	.long sys_vfork		/* 190 */
+	.long sys_ni_syscall		/* streams1 */
+	.long sys_ni_syscall		/* streams2 */
+	.long sys_vfork			/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap_pgoff
+	.long sys_mmap2
 	.long sys_truncate64
 	.long sys_ftruncate64
-	.long sys_stat64	/* 195 */
+	.long sys_stat64		/* 195 */
 	.long sys_lstat64
 	.long sys_fstat64
 	.long sys_chown
 	.long sys_getuid
-	.long sys_getgid	/* 200 */
+	.long sys_getgid		/* 200 */
 	.long sys_geteuid
 	.long sys_getegid
 	.long sys_setreuid
 	.long sys_setregid
-	.long sys_getgroups	/* 205 */
+	.long sys_getgroups		/* 205 */
 	.long sys_setgroups
 	.long sys_fchown
 	.long sys_setresuid
 	.long sys_getresuid
-	.long sys_setresgid	/* 210 */
+	.long sys_setresgid		/* 210 */
 	.long sys_getresgid
 	.long sys_lchown
 	.long sys_setuid
 	.long sys_setgid
-	.long sys_setfsuid	/* 215 */
+	.long sys_setfsuid		/* 215 */
 	.long sys_setfsgid
 	.long sys_pivot_root
 	.long sys_ni_syscall
 	.long sys_ni_syscall
-	.long sys_getdents64	/* 220 */
+	.long sys_getdents64		/* 220 */
 	.long sys_gettid
 	.long sys_tkill
 	.long sys_setxattr
 	.long sys_lsetxattr
-	.long sys_fsetxattr	/* 225 */
+	.long sys_fsetxattr		/* 225 */
 	.long sys_getxattr
 	.long sys_lgetxattr
 	.long sys_fgetxattr
 	.long sys_listxattr
-	.long sys_llistxattr	/* 230 */
+	.long sys_llistxattr		/* 230 */
 	.long sys_flistxattr
 	.long sys_removexattr
 	.long sys_lremovexattr
 	.long sys_fremovexattr
-	.long sys_futex		/* 235 */
+	.long sys_futex			/* 235 */
 	.long sys_sendfile64
-	.long sys_ni_syscall	/* sys_mincore */
-	.long sys_ni_syscall	/* sys_madvise */
+	.long sys_mincore
+	.long sys_madvise
 	.long sys_fcntl64
-	.long sys_readahead	/* 240 */
+	.long sys_readahead		/* 240 */
 	.long sys_io_setup
 	.long sys_io_destroy
 	.long sys_io_getevents
 	.long sys_io_submit
-	.long sys_io_cancel	/* 245 */
+	.long sys_io_cancel		/* 245 */
 	.long sys_fadvise64
 	.long sys_exit_group
 	.long sys_lookup_dcookie
 	.long sys_epoll_create
-	.long sys_epoll_ctl	/* 250 */
+	.long sys_epoll_ctl		/* 250 */
 	.long sys_epoll_wait
-	.long sys_ni_syscall	/* sys_remap_file_pages */
+	.long sys_remap_file_pages
 	.long sys_set_tid_address
 	.long sys_timer_create
-	.long sys_timer_settime	/* 255 */
+	.long sys_timer_settime		/* 255 */
 	.long sys_timer_gettime
 	.long sys_timer_getoverrun
 	.long sys_timer_delete
 	.long sys_clock_settime
-	.long sys_clock_gettime	/* 260 */
+	.long sys_clock_gettime		/* 260 */
 	.long sys_clock_getres
 	.long sys_clock_nanosleep
 	.long sys_statfs64
 	.long sys_fstatfs64
-	.long sys_tgkill	/* 265 */
+	.long sys_tgkill		/* 265 */
 	.long sys_utimes
 	.long sys_fadvise64_64
-	.long sys_mbind	
+	.long sys_mbind
 	.long sys_get_mempolicy
-	.long sys_set_mempolicy	/* 270 */
+	.long sys_set_mempolicy		/* 270 */
 	.long sys_mq_open
 	.long sys_mq_unlink
 	.long sys_mq_timedsend
 	.long sys_mq_timedreceive
-	.long sys_mq_notify	/* 275 */
+	.long sys_mq_notify		/* 275 */
 	.long sys_mq_getsetattr
 	.long sys_waitid
-	.long sys_ni_syscall	/* for sys_vserver */
+	.long sys_ni_syscall		/* for sys_vserver */
 	.long sys_add_key
-	.long sys_request_key	/* 280 */
+	.long sys_request_key		/* 280 */
 	.long sys_keyctl
 	.long sys_ioprio_set
 	.long sys_ioprio_get
@@ -319,8 +322,8 @@ ENTRY(sys_call_table)
 	.long sys_readlinkat
 	.long sys_fchmodat
 	.long sys_faccessat		/* 300 */
-	.long sys_ni_syscall		/* Reserved for pselect6 */
-	.long sys_ni_syscall		/* Reserved for ppoll */
+	.long sys_pselect6
+	.long sys_ppoll
 	.long sys_unshare
 	.long sys_set_robust_list
 	.long sys_get_robust_list	/* 305 */
@@ -358,8 +361,8 @@ ENTRY(sys_call_table)
 	.long sys_fanotify_init
 	.long sys_fanotify_mark
 	.long sys_prlimit64
-
-	.rept NR_syscalls-(.-sys_call_table)/4
-		.long sys_ni_syscall
-	.endr
+	.long sys_name_to_handle_at	/* 340 */
+	.long sys_open_by_handle_at
+	.long sys_clock_adjtime
+	.long sys_syncfs
 
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 878be5f38cad..d0993594f558 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -25,6 +25,8 @@ SECTIONS
 
   EXCEPTION_TABLE(16)
 
+  _sdata = .;			/* Start of data section */
+
   RODATA
 
   RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 1ad6b7ad2c17..8080469ee6c1 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -25,6 +25,7 @@ SECTIONS
   _etext = .;			/* End of text section */
 
   EXCEPTION_TABLE(16) :data
+  _sdata = .;			/* Start of rw data section */
   RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data
   /* End of data goes *here* so that freeing init code works properly. */
   _edata = .;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 02b7a03e4226..8b3db1c587fc 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -300,6 +300,8 @@ void __init paging_init(void)
 		zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
 		free_area_init_node(i, zones_size,
 				    m68k_memory[i].addr >> PAGE_SHIFT, NULL);
+		if (node_present_pages(i))
+			node_set_state(i, N_NORMAL_MEMORY);
 	}
 }
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 851b3bf6e962..eccdefe70d4e 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -6,7 +6,6 @@ config MICROBLAZE
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
-	select USB_ARCH_HAS_EHCI
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_OPROFILE
 	select HAVE_ARCH_KGDB
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index d8a214f11ac2..e5550ce4e0eb 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -217,16 +217,12 @@ static struct clocksource clocksource_microblaze = {
 	.rating		= 300,
 	.read		= microblaze_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 8, /* I can shift it */
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static int __init microblaze_clocksource_init(void)
 {
-	clocksource_microblaze.mult =
-			clocksource_hz2mult(timer_clock_freq,
-						clocksource_microblaze.shift);
-	if (clocksource_register(&clocksource_microblaze))
+	if (clocksource_register_hz(&clocksource_microblaze, timer_clock_freq))
 		panic("failed to register clocksource");
 
 	/* stop timer1 */
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 7ff9b5492041..aef6c917b45a 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -11,6 +11,7 @@ platforms += dec
 platforms += emma
 platforms += jazz
 platforms += jz4740
+platforms += lantiq
 platforms += lasat
 platforms += loongson
 platforms += mipssim
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8e256cc5dcd9..cef1a854487d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -212,6 +212,24 @@ config MACH_JZ4740
 	select HAVE_PWM
 	select HAVE_CLK
 
+config LANTIQ
+	bool "Lantiq based platforms"
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select CEVT_R4K
+	select CSRC_R4K
+	select SYS_HAS_CPU_MIPS32_R1
+	select SYS_HAS_CPU_MIPS32_R2
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_MULTITHREADING
+	select SYS_HAS_EARLY_PRINTK
+	select ARCH_REQUIRE_GPIOLIB
+	select SWAP_IO_SPACE
+	select BOOT_RAW
+	select HAVE_CLK
+	select MIPS_MACHINE
+
 config LASAT
 	bool "LASAT Networks platforms"
 	select CEVT_R4K
@@ -736,6 +754,33 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
 		Hikari
 	  Say Y here for most Octeon reference boards.
 
+config NLM_XLR_BOARD
+	bool "Netlogic XLR/XLS based systems"
+	depends on EXPERIMENTAL
+	select BOOT_ELF32
+	select NLM_COMMON
+	select NLM_XLR
+	select SYS_HAS_CPU_XLR
+	select SYS_SUPPORTS_SMP
+	select HW_HAS_PCI
+	select SWAP_IO_SPACE
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select 64BIT_PHYS_ADDR
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_HIGHMEM
+	select DMA_COHERENT
+	select NR_CPUS_DEFAULT_32
+	select CEVT_R4K
+	select CSRC_R4K
+	select IRQ_CPU
+	select ZONE_DMA if 64BIT
+	select SYNC_R4K
+	select SYS_HAS_EARLY_PRINTK
+	help
+	  Support for systems based on Netlogic XLR and XLS processors.
+	  Say Y here if you have a XLR or XLS based board.
+
 endchoice
 
 source "arch/mips/alchemy/Kconfig"
@@ -743,6 +788,7 @@ source "arch/mips/ath79/Kconfig"
 source "arch/mips/bcm63xx/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
+source "arch/mips/lantiq/Kconfig"
 source "arch/mips/lasat/Kconfig"
 source "arch/mips/pmc-sierra/Kconfig"
 source "arch/mips/powertv/Kconfig"
@@ -752,6 +798,7 @@ source "arch/mips/txx9/Kconfig"
 source "arch/mips/vr41xx/Kconfig"
 source "arch/mips/cavium-octeon/Kconfig"
 source "arch/mips/loongson/Kconfig"
+source "arch/mips/netlogic/Kconfig"
 
 endmenu
 
@@ -997,9 +1044,6 @@ config IRQ_GT641XX
 config IRQ_GIC
 	bool
 
-config IRQ_CPU_OCTEON
-	bool
-
 config MIPS_BOARDS_GEN
 	bool
 
@@ -1359,8 +1403,6 @@ config CPU_SB1
 config CPU_CAVIUM_OCTEON
 	bool "Cavium Octeon processor"
 	depends on SYS_HAS_CPU_CAVIUM_OCTEON
-	select IRQ_CPU
-	select IRQ_CPU_OCTEON
 	select CPU_HAS_PREFETCH
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_SMP
@@ -1425,6 +1467,17 @@ config CPU_BMIPS5000
 	help
 	  Broadcom BMIPS5000 processors.
 
+config CPU_XLR
+	bool "Netlogic XLR SoC"
+	depends on SYS_HAS_CPU_XLR
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_64BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select WEAK_ORDERING
+	select WEAK_REORDERING_BEYOND_LLSC
+	select CPU_SUPPORTS_HUGEPAGES
+	help
+	  Netlogic Microsystems XLR/XLS processors.
 endchoice
 
 if CPU_LOONGSON2F
@@ -1555,6 +1608,9 @@ config SYS_HAS_CPU_BMIPS4380
 config SYS_HAS_CPU_BMIPS5000
 	bool
 
+config SYS_HAS_CPU_XLR
+	bool
+
 #
 # CPU may reorder R->R, R->W, W->R, W->W
 # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC
@@ -2339,6 +2395,7 @@ config MMU
 
 config I8253
 	bool
+	select CLKSRC_I8253
 	select MIPS_EXTERNAL_TIMER
 
 config ZONE_DMA32
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 53e3514ba10e..884819cd0607 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -191,6 +191,18 @@ endif
 #
 include $(srctree)/arch/mips/Kbuild.platforms
 
+#
+# NETLOGIC SOC Common (common)
+#
+cflags-$(CONFIG_NLM_COMMON)		+= -I$(srctree)/arch/mips/include/asm/mach-netlogic
+cflags-$(CONFIG_NLM_COMMON)		+= -I$(srctree)/arch/mips/include/asm/netlogic
+
+#
+# NETLOGIC XLR/XLS SoC, Simulator and boards
+#
+core-$(CONFIG_NLM_XLR)      		+= arch/mips/netlogic/xlr/
+load-$(CONFIG_NLM_XLR_BOARD)		+= 0xffffffff84000000
+
 cflags-y			+= -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)		+= arch/mips/pci/
 
diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
index ca0506a8585a..3a5abb54d505 100644
--- a/arch/mips/alchemy/common/dbdma.c
+++ b/arch/mips/alchemy/common/dbdma.c
@@ -36,7 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
 
@@ -58,7 +58,8 @@ static DEFINE_SPINLOCK(au1xxx_dbdma_spin_lock);
 /* I couldn't find a macro that did this... */
 #define ALIGN_ADDR(x, a)	((((u32)(x)) + (a-1)) & ~(a-1))
 
-static dbdma_global_t *dbdma_gptr = (dbdma_global_t *)DDMA_GLOBAL_BASE;
+static dbdma_global_t *dbdma_gptr =
+			(dbdma_global_t *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
 static int dbdma_initialized;
 
 static dbdev_tab_t dbdev_tab[] = {
@@ -299,7 +300,7 @@ u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
 	if (ctp != NULL) {
 		memset(ctp, 0, sizeof(chan_tab_t));
 		ctp->chan_index = chan = i;
-		dcp = DDMA_CHANNEL_BASE;
+		dcp = KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
 		dcp += (0x0100 * chan);
 		ctp->chan_ptr = (au1x_dma_chan_t *)dcp;
 		cp = (au1x_dma_chan_t *)dcp;
@@ -958,105 +959,75 @@ u32 au1xxx_dbdma_put_dscr(u32 chanid, au1x_ddma_desc_t *dscr)
 }
 
 
-struct alchemy_dbdma_sysdev {
-	struct sys_device sysdev;
-	u32 pm_regs[NUM_DBDMA_CHANS + 1][6];
-};
+static unsigned long alchemy_dbdma_pm_data[NUM_DBDMA_CHANS + 1][6];
 
-static int alchemy_dbdma_suspend(struct sys_device *dev,
-				 pm_message_t state)
+static int alchemy_dbdma_suspend(void)
 {
-	struct alchemy_dbdma_sysdev *sdev =
-		container_of(dev, struct alchemy_dbdma_sysdev, sysdev);
 	int i;
-	u32 addr;
+	void __iomem *addr;
 
-	addr = DDMA_GLOBAL_BASE;
-	sdev->pm_regs[0][0] = au_readl(addr + 0x00);
-	sdev->pm_regs[0][1] = au_readl(addr + 0x04);
-	sdev->pm_regs[0][2] = au_readl(addr + 0x08);
-	sdev->pm_regs[0][3] = au_readl(addr + 0x0c);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	alchemy_dbdma_pm_data[0][0] = __raw_readl(addr + 0x00);
+	alchemy_dbdma_pm_data[0][1] = __raw_readl(addr + 0x04);
+	alchemy_dbdma_pm_data[0][2] = __raw_readl(addr + 0x08);
+	alchemy_dbdma_pm_data[0][3] = __raw_readl(addr + 0x0c);
 
 	/* save channel configurations */
-	for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) {
-		sdev->pm_regs[i][0] = au_readl(addr + 0x00);
-		sdev->pm_regs[i][1] = au_readl(addr + 0x04);
-		sdev->pm_regs[i][2] = au_readl(addr + 0x08);
-		sdev->pm_regs[i][3] = au_readl(addr + 0x0c);
-		sdev->pm_regs[i][4] = au_readl(addr + 0x10);
-		sdev->pm_regs[i][5] = au_readl(addr + 0x14);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
+	for (i = 1; i <= NUM_DBDMA_CHANS; i++) {
+		alchemy_dbdma_pm_data[i][0] = __raw_readl(addr + 0x00);
+		alchemy_dbdma_pm_data[i][1] = __raw_readl(addr + 0x04);
+		alchemy_dbdma_pm_data[i][2] = __raw_readl(addr + 0x08);
+		alchemy_dbdma_pm_data[i][3] = __raw_readl(addr + 0x0c);
+		alchemy_dbdma_pm_data[i][4] = __raw_readl(addr + 0x10);
+		alchemy_dbdma_pm_data[i][5] = __raw_readl(addr + 0x14);
 
 		/* halt channel */
-		au_writel(sdev->pm_regs[i][0] & ~1, addr + 0x00);
-		au_sync();
-		while (!(au_readl(addr + 0x14) & 1))
-			au_sync();
+		__raw_writel(alchemy_dbdma_pm_data[i][0] & ~1, addr + 0x00);
+		wmb();
+		while (!(__raw_readl(addr + 0x14) & 1))
+			wmb();
 
 		addr += 0x100;	/* next channel base */
 	}
 	/* disable channel interrupts */
-	au_writel(0, DDMA_GLOBAL_BASE + 0x0c);
-	au_sync();
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	__raw_writel(0, addr + 0x0c);
+	wmb();
 
 	return 0;
 }
 
-static int alchemy_dbdma_resume(struct sys_device *dev)
+static void alchemy_dbdma_resume(void)
 {
-	struct alchemy_dbdma_sysdev *sdev =
-		container_of(dev, struct alchemy_dbdma_sysdev, sysdev);
 	int i;
-	u32 addr;
+	void __iomem *addr;
 
-	addr = DDMA_GLOBAL_BASE;
-	au_writel(sdev->pm_regs[0][0], addr + 0x00);
-	au_writel(sdev->pm_regs[0][1], addr + 0x04);
-	au_writel(sdev->pm_regs[0][2], addr + 0x08);
-	au_writel(sdev->pm_regs[0][3], addr + 0x0c);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	__raw_writel(alchemy_dbdma_pm_data[0][0], addr + 0x00);
+	__raw_writel(alchemy_dbdma_pm_data[0][1], addr + 0x04);
+	__raw_writel(alchemy_dbdma_pm_data[0][2], addr + 0x08);
+	__raw_writel(alchemy_dbdma_pm_data[0][3], addr + 0x0c);
 
 	/* restore channel configurations */
-	for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) {
-		au_writel(sdev->pm_regs[i][0], addr + 0x00);
-		au_writel(sdev->pm_regs[i][1], addr + 0x04);
-		au_writel(sdev->pm_regs[i][2], addr + 0x08);
-		au_writel(sdev->pm_regs[i][3], addr + 0x0c);
-		au_writel(sdev->pm_regs[i][4], addr + 0x10);
-		au_writel(sdev->pm_regs[i][5], addr + 0x14);
-		au_sync();
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
+	for (i = 1; i <= NUM_DBDMA_CHANS; i++) {
+		__raw_writel(alchemy_dbdma_pm_data[i][0], addr + 0x00);
+		__raw_writel(alchemy_dbdma_pm_data[i][1], addr + 0x04);
+		__raw_writel(alchemy_dbdma_pm_data[i][2], addr + 0x08);
+		__raw_writel(alchemy_dbdma_pm_data[i][3], addr + 0x0c);
+		__raw_writel(alchemy_dbdma_pm_data[i][4], addr + 0x10);
+		__raw_writel(alchemy_dbdma_pm_data[i][5], addr + 0x14);
+		wmb();
 		addr += 0x100;	/* next channel base */
 	}
-
-	return 0;
 }
 
-static struct sysdev_class alchemy_dbdma_sysdev_class = {
-	.name		= "dbdma",
+static struct syscore_ops alchemy_dbdma_syscore_ops = {
 	.suspend	= alchemy_dbdma_suspend,
 	.resume		= alchemy_dbdma_resume,
 };
 
-static int __init alchemy_dbdma_sysdev_init(void)
-{
-	struct alchemy_dbdma_sysdev *sdev;
-	int ret;
-
-	ret = sysdev_class_register(&alchemy_dbdma_sysdev_class);
-	if (ret)
-		return ret;
-
-	sdev = kzalloc(sizeof(struct alchemy_dbdma_sysdev), GFP_KERNEL);
-	if (!sdev)
-		return -ENOMEM;
-
-	sdev->sysdev.id = -1;
-	sdev->sysdev.cls = &alchemy_dbdma_sysdev_class;
-	ret = sysdev_register(&sdev->sysdev);
-	if (ret)
-		kfree(sdev);
-
-	return ret;
-}
-
 static int __init au1xxx_dbdma_init(void)
 {
 	int irq_nr, ret;
@@ -1084,11 +1055,7 @@ static int __init au1xxx_dbdma_init(void)
 	else {
 		dbdma_initialized = 1;
 		printk(KERN_INFO "Alchemy DBDMA initialized\n");
-		ret = alchemy_dbdma_sysdev_init();
-		if (ret) {
-			printk(KERN_ERR "DBDMA PM init failed\n");
-			ret = 0;
-		}
+		register_syscore_ops(&alchemy_dbdma_syscore_ops);
 	}
 
 	return ret;
diff --git a/arch/mips/alchemy/common/dma.c b/arch/mips/alchemy/common/dma.c
index d5278877891d..347980e79a89 100644
--- a/arch/mips/alchemy/common/dma.c
+++ b/arch/mips/alchemy/common/dma.c
@@ -58,6 +58,9 @@
  * returned from request_dma.
  */
 
+/* DMA Channel register block spacing */
+#define DMA_CHANNEL_LEN		0x00000100
+
 DEFINE_SPINLOCK(au1000_dma_spin_lock);
 
 struct dma_chan au1000_dma_table[NUM_AU1000_DMA_CHANNELS] = {
@@ -77,22 +80,23 @@ static const struct dma_dev {
 	unsigned int fifo_addr;
 	unsigned int dma_mode;
 } dma_dev_table[DMA_NUM_DEV] = {
-	{UART0_ADDR + UART_TX, 0},
-	{UART0_ADDR + UART_RX, 0},
-	{0, 0},
-	{0, 0},
-	{AC97C_DATA, DMA_DW16 },          /* coherent */
-	{AC97C_DATA, DMA_DR | DMA_DW16 }, /* coherent */
-	{UART3_ADDR + UART_TX, DMA_DW8 | DMA_NC},
-	{UART3_ADDR + UART_RX, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP0RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP0WR, DMA_DW8 | DMA_NC},
-	{USBD_EP2WR, DMA_DW8 | DMA_NC},
-	{USBD_EP3WR, DMA_DW8 | DMA_NC},
-	{USBD_EP4RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP5RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{I2S_DATA, DMA_DW32 | DMA_NC},
-	{I2S_DATA, DMA_DR | DMA_DW32 | DMA_NC}
+	{ AU1000_UART0_PHYS_ADDR + 0x04, DMA_DW8 },		/* UART0_TX */
+	{ AU1000_UART0_PHYS_ADDR + 0x00, DMA_DW8 | DMA_DR },	/* UART0_RX */
+	{ 0, 0 },	/* DMA_REQ0 */
+	{ 0, 0 },	/* DMA_REQ1 */
+	{ AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 },		/* AC97 TX c */
+	{ AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 | DMA_DR },	/* AC97 RX c */
+	{ AU1000_UART3_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC },	/* UART3_TX */
+	{ AU1000_UART3_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* UART3_RX */
+	{ AU1000_USBD_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* EP0RD */
+	{ AU1000_USBD_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC }, /* EP0WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x08, DMA_DW8 | DMA_NC }, /* EP2WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x0c, DMA_DW8 | DMA_NC }, /* EP3WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x10, DMA_DW8 | DMA_NC | DMA_DR }, /* EP4RD */
+	{ AU1000_USBD_PHYS_ADDR + 0x14, DMA_DW8 | DMA_NC | DMA_DR }, /* EP5RD */
+	/* on Au1500, these 2 are DMA_REQ2/3 (GPIO208/209) instead! */
+	{ AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC},	/* I2S TX */
+	{ AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC | DMA_DR}, /* I2S RX */
 };
 
 int au1000_dma_read_proc(char *buf, char **start, off_t fpos,
@@ -123,10 +127,10 @@ int au1000_dma_read_proc(char *buf, char **start, off_t fpos,
 
 /* Device FIFO addresses and default DMA modes - 2nd bank */
 static const struct dma_dev dma_dev_table_bank2[DMA_NUM_DEV_BANK2] = {
-	{ SD0_XMIT_FIFO, DMA_DS | DMA_DW8 },		/* coherent */
-	{ SD0_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 },	/* coherent */
-	{ SD1_XMIT_FIFO, DMA_DS | DMA_DW8 },		/* coherent */
-	{ SD1_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 }	/* coherent */
+	{ AU1100_SD0_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 },		/* coherent */
+	{ AU1100_SD0_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR },	/* coherent */
+	{ AU1100_SD1_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 },		/* coherent */
+	{ AU1100_SD1_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR }	/* coherent */
 };
 
 void dump_au1000_dma_channel(unsigned int dmanr)
@@ -202,7 +206,7 @@ int request_au1000_dma(int dev_id, const char *dev_str,
 	}
 
 	/* fill it in */
-	chan->io = DMA_CHANNEL_BASE + i * DMA_CHANNEL_LEN;
+	chan->io = KSEG1ADDR(AU1000_DMA_PHYS_ADDR) + i * DMA_CHANNEL_LEN;
 	chan->dev_id = dev_id;
 	chan->dev_str = dev_str;
 	chan->fifo_addr = dev->fifo_addr;
diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c
index 55dd7c888517..8b60ba0675e2 100644
--- a/arch/mips/alchemy/common/irq.c
+++ b/arch/mips/alchemy/common/irq.c
@@ -30,7 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
@@ -39,6 +39,36 @@
 #include <asm/mach-pb1x00/pb1000.h>
 #endif
 
+/* Interrupt Controller register offsets */
+#define IC_CFG0RD	0x40
+#define IC_CFG0SET	0x40
+#define IC_CFG0CLR	0x44
+#define IC_CFG1RD	0x48
+#define IC_CFG1SET	0x48
+#define IC_CFG1CLR	0x4C
+#define IC_CFG2RD	0x50
+#define IC_CFG2SET	0x50
+#define IC_CFG2CLR	0x54
+#define IC_REQ0INT	0x54
+#define IC_SRCRD	0x58
+#define IC_SRCSET	0x58
+#define IC_SRCCLR	0x5C
+#define IC_REQ1INT	0x5C
+#define IC_ASSIGNRD	0x60
+#define IC_ASSIGNSET	0x60
+#define IC_ASSIGNCLR	0x64
+#define IC_WAKERD	0x68
+#define IC_WAKESET	0x68
+#define IC_WAKECLR	0x6C
+#define IC_MASKRD	0x70
+#define IC_MASKSET	0x70
+#define IC_MASKCLR	0x74
+#define IC_RISINGRD	0x78
+#define IC_RISINGCLR	0x78
+#define IC_FALLINGRD	0x7C
+#define IC_FALLINGCLR	0x7C
+#define IC_TESTBIT	0x80
+
 static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type);
 
 /* NOTE on interrupt priorities: The original writers of this code said:
@@ -221,89 +251,101 @@ struct au1xxx_irqmap au1200_irqmap[] __initdata = {
 static void au1x_ic0_unmask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
-	au_writel(1 << bit, IC0_MASKSET);
-	au_writel(1 << bit, IC0_WAKESET);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKSET);
+	__raw_writel(1 << bit, base + IC_WAKESET);
+	wmb();
 }
 
 static void au1x_ic1_unmask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
-	au_writel(1 << bit, IC1_MASKSET);
-	au_writel(1 << bit, IC1_WAKESET);
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKSET);
+	__raw_writel(1 << bit, base + IC_WAKESET);
 
 /* very hacky. does the pb1000 cpld auto-disable this int?
  * nowhere in the current kernel sources is it disabled.	--mlau
  */
 #if defined(CONFIG_MIPS_PB1000)
 	if (d->irq == AU1000_GPIO15_INT)
-		au_writel(0x4000, PB1000_MDR); /* enable int */
+		__raw_writel(0x4000, (void __iomem *)PB1000_MDR); /* enable int */
 #endif
-	au_sync();
+	wmb();
 }
 
 static void au1x_ic0_mask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
-	au_writel(1 << bit, IC0_MASKCLR);
-	au_writel(1 << bit, IC0_WAKECLR);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	wmb();
 }
 
 static void au1x_ic1_mask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
-	au_writel(1 << bit, IC1_MASKCLR);
-	au_writel(1 << bit, IC1_WAKECLR);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	wmb();
 }
 
 static void au1x_ic0_ack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 
 	/*
 	 * This may assume that we don't get interrupts from
 	 * both edges at once, or if we do, that we don't care.
 	 */
-	au_writel(1 << bit, IC0_FALLINGCLR);
-	au_writel(1 << bit, IC0_RISINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	wmb();
 }
 
 static void au1x_ic1_ack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 
 	/*
 	 * This may assume that we don't get interrupts from
 	 * both edges at once, or if we do, that we don't care.
 	 */
-	au_writel(1 << bit, IC1_FALLINGCLR);
-	au_writel(1 << bit, IC1_RISINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	wmb();
 }
 
 static void au1x_ic0_maskack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 
-	au_writel(1 << bit, IC0_WAKECLR);
-	au_writel(1 << bit, IC0_MASKCLR);
-	au_writel(1 << bit, IC0_RISINGCLR);
-	au_writel(1 << bit, IC0_FALLINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	wmb();
 }
 
 static void au1x_ic1_maskack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 
-	au_writel(1 << bit, IC1_WAKECLR);
-	au_writel(1 << bit, IC1_MASKCLR);
-	au_writel(1 << bit, IC1_RISINGCLR);
-	au_writel(1 << bit, IC1_FALLINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	wmb();
 }
 
 static int au1x_ic1_setwake(struct irq_data *d, unsigned int on)
@@ -318,13 +360,13 @@ static int au1x_ic1_setwake(struct irq_data *d, unsigned int on)
 		return -EINVAL;
 
 	local_irq_save(flags);
-	wakemsk = au_readl(SYS_WAKEMSK);
+	wakemsk = __raw_readl((void __iomem *)SYS_WAKEMSK);
 	if (on)
 		wakemsk |= 1 << bit;
 	else
 		wakemsk &= ~(1 << bit);
-	au_writel(wakemsk, SYS_WAKEMSK);
-	au_sync();
+	__raw_writel(wakemsk, (void __iomem *)SYS_WAKEMSK);
+	wmb();
 	local_irq_restore(flags);
 
 	return 0;
@@ -356,81 +398,74 @@ static struct irq_chip au1x_ic1_chip = {
 static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type)
 {
 	struct irq_chip *chip;
-	unsigned long icr[6];
-	unsigned int bit, ic, irq = d->irq;
+	unsigned int bit, irq = d->irq;
 	irq_flow_handler_t handler = NULL;
 	unsigned char *name = NULL;
+	void __iomem *base;
 	int ret;
 
 	if (irq >= AU1000_INTC1_INT_BASE) {
 		bit = irq - AU1000_INTC1_INT_BASE;
 		chip = &au1x_ic1_chip;
-		ic = 1;
+		base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 	} else {
 		bit = irq - AU1000_INTC0_INT_BASE;
 		chip = &au1x_ic0_chip;
-		ic = 0;
+		base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 	}
 
 	if (bit > 31)
 		return -EINVAL;
 
-	icr[0] = ic ? IC1_CFG0SET : IC0_CFG0SET;
-	icr[1] = ic ? IC1_CFG1SET : IC0_CFG1SET;
-	icr[2] = ic ? IC1_CFG2SET : IC0_CFG2SET;
-	icr[3] = ic ? IC1_CFG0CLR : IC0_CFG0CLR;
-	icr[4] = ic ? IC1_CFG1CLR : IC0_CFG1CLR;
-	icr[5] = ic ? IC1_CFG2CLR : IC0_CFG2CLR;
-
 	ret = 0;
 
 	switch (flow_type) {	/* cfgregs 2:1:0 */
 	case IRQ_TYPE_EDGE_RISING:	/* 0:0:1 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_edge_irq;
 		name = "riseedge";
 		break;
 	case IRQ_TYPE_EDGE_FALLING:	/* 0:1:0 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		handler = handle_edge_irq;
 		name = "falledge";
 		break;
 	case IRQ_TYPE_EDGE_BOTH:	/* 0:1:1 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_edge_irq;
 		name = "bothedge";
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:	/* 1:0:1 */
-		au_writel(1 << bit, icr[2]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2SET);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_level_irq;
 		name = "hilevel";
 		break;
 	case IRQ_TYPE_LEVEL_LOW:	/* 1:1:0 */
-		au_writel(1 << bit, icr[2]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2SET);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		handler = handle_level_irq;
 		name = "lowlevel";
 		break;
 	case IRQ_TYPE_NONE:		/* 0:0:0 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		break;
 	default:
 		ret = -EINVAL;
 	}
 	__irq_set_chip_handler_name_locked(d->irq, chip, handler, name);
 
-	au_sync();
+	wmb();
 
 	return ret;
 }
@@ -444,21 +479,21 @@ asmlinkage void plat_irq_dispatch(void)
 		off = MIPS_CPU_IRQ_BASE + 7;
 		goto handle;
 	} else if (pending & CAUSEF_IP2) {
-		s = IC0_REQ0INT;
+		s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ0INT;
 		off = AU1000_INTC0_INT_BASE;
 	} else if (pending & CAUSEF_IP3) {
-		s = IC0_REQ1INT;
+		s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ1INT;
 		off = AU1000_INTC0_INT_BASE;
 	} else if (pending & CAUSEF_IP4) {
-		s = IC1_REQ0INT;
+		s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ0INT;
 		off = AU1000_INTC1_INT_BASE;
 	} else if (pending & CAUSEF_IP5) {
-		s = IC1_REQ1INT;
+		s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ1INT;
 		off = AU1000_INTC1_INT_BASE;
 	} else
 		goto spurious;
 
-	s = au_readl(s);
+	s = __raw_readl((void __iomem *)s);
 	if (unlikely(!s)) {
 spurious:
 		spurious_interrupt();
@@ -469,48 +504,42 @@ handle:
 	do_IRQ(off);
 }
 
+
+static inline void ic_init(void __iomem *base)
+{
+	/* initialize interrupt controller to a safe state */
+	__raw_writel(0xffffffff, base + IC_CFG0CLR);
+	__raw_writel(0xffffffff, base + IC_CFG1CLR);
+	__raw_writel(0xffffffff, base + IC_CFG2CLR);
+	__raw_writel(0xffffffff, base + IC_MASKCLR);
+	__raw_writel(0xffffffff, base + IC_ASSIGNCLR);
+	__raw_writel(0xffffffff, base + IC_WAKECLR);
+	__raw_writel(0xffffffff, base + IC_SRCSET);
+	__raw_writel(0xffffffff, base + IC_FALLINGCLR);
+	__raw_writel(0xffffffff, base + IC_RISINGCLR);
+	__raw_writel(0x00000000, base + IC_TESTBIT);
+	wmb();
+}
+
 static void __init au1000_init_irq(struct au1xxx_irqmap *map)
 {
 	unsigned int bit, irq_nr;
-	int i;
-
-	/*
-	 * Initialize interrupt controllers to a safe state.
-	 */
-	au_writel(0xffffffff, IC0_CFG0CLR);
-	au_writel(0xffffffff, IC0_CFG1CLR);
-	au_writel(0xffffffff, IC0_CFG2CLR);
-	au_writel(0xffffffff, IC0_MASKCLR);
-	au_writel(0xffffffff, IC0_ASSIGNCLR);
-	au_writel(0xffffffff, IC0_WAKECLR);
-	au_writel(0xffffffff, IC0_SRCSET);
-	au_writel(0xffffffff, IC0_FALLINGCLR);
-	au_writel(0xffffffff, IC0_RISINGCLR);
-	au_writel(0x00000000, IC0_TESTBIT);
-
-	au_writel(0xffffffff, IC1_CFG0CLR);
-	au_writel(0xffffffff, IC1_CFG1CLR);
-	au_writel(0xffffffff, IC1_CFG2CLR);
-	au_writel(0xffffffff, IC1_MASKCLR);
-	au_writel(0xffffffff, IC1_ASSIGNCLR);
-	au_writel(0xffffffff, IC1_WAKECLR);
-	au_writel(0xffffffff, IC1_SRCSET);
-	au_writel(0xffffffff, IC1_FALLINGCLR);
-	au_writel(0xffffffff, IC1_RISINGCLR);
-	au_writel(0x00000000, IC1_TESTBIT);
+	void __iomem *base;
 
+	ic_init((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR));
+	ic_init((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR));
 	mips_cpu_irq_init();
 
 	/* register all 64 possible IC0+IC1 irq sources as type "none".
 	 * Use set_irq_type() to set edge/level behaviour at runtime.
 	 */
-	for (i = AU1000_INTC0_INT_BASE;
-	     (i < AU1000_INTC0_INT_BASE + 32); i++)
-		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
+	for (irq_nr = AU1000_INTC0_INT_BASE;
+	     (irq_nr < AU1000_INTC0_INT_BASE + 32); irq_nr++)
+		au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE);
 
-	for (i = AU1000_INTC1_INT_BASE;
-	     (i < AU1000_INTC1_INT_BASE + 32); i++)
-		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
+	for (irq_nr = AU1000_INTC1_INT_BASE;
+	     (irq_nr < AU1000_INTC1_INT_BASE + 32); irq_nr++)
+		au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE);
 
 	/*
 	 * Initialize IC0, which is fixed per processor.
@@ -520,13 +549,13 @@ static void __init au1000_init_irq(struct au1xxx_irqmap *map)
 
 		if (irq_nr >= AU1000_INTC1_INT_BASE) {
 			bit = irq_nr - AU1000_INTC1_INT_BASE;
-			if (map->im_request)
-				au_writel(1 << bit, IC1_ASSIGNSET);
+			base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 		} else {
 			bit = irq_nr - AU1000_INTC0_INT_BASE;
-			if (map->im_request)
-				au_writel(1 << bit, IC0_ASSIGNSET);
+			base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 		}
+		if (map->im_request)
+			__raw_writel(1 << bit, base + IC_ASSIGNSET);
 
 		au1x_ic_settype(irq_get_irq_data(irq_nr), map->im_type);
 		++map;
@@ -556,90 +585,62 @@ void __init arch_init_irq(void)
 	}
 }
 
-struct alchemy_ic_sysdev {
-	struct sys_device sysdev;
-	void __iomem *base;
-	unsigned long pmdata[7];
-};
 
-static int alchemy_ic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct alchemy_ic_sysdev *icdev =
-			container_of(dev, struct alchemy_ic_sysdev, sysdev);
+static unsigned long alchemy_ic_pmdata[7 * 2];
 
-	icdev->pmdata[0] = __raw_readl(icdev->base + IC_CFG0RD);
-	icdev->pmdata[1] = __raw_readl(icdev->base + IC_CFG1RD);
-	icdev->pmdata[2] = __raw_readl(icdev->base + IC_CFG2RD);
-	icdev->pmdata[3] = __raw_readl(icdev->base + IC_SRCRD);
-	icdev->pmdata[4] = __raw_readl(icdev->base + IC_ASSIGNRD);
-	icdev->pmdata[5] = __raw_readl(icdev->base + IC_WAKERD);
-	icdev->pmdata[6] = __raw_readl(icdev->base + IC_MASKRD);
-
-	return 0;
+static inline void alchemy_ic_suspend_one(void __iomem *base, unsigned long *d)
+{
+	d[0] = __raw_readl(base + IC_CFG0RD);
+	d[1] = __raw_readl(base + IC_CFG1RD);
+	d[2] = __raw_readl(base + IC_CFG2RD);
+	d[3] = __raw_readl(base + IC_SRCRD);
+	d[4] = __raw_readl(base + IC_ASSIGNRD);
+	d[5] = __raw_readl(base + IC_WAKERD);
+	d[6] = __raw_readl(base + IC_MASKRD);
+	ic_init(base);		/* shut it up too while at it */
 }
 
-static int alchemy_ic_resume(struct sys_device *dev)
+static inline void alchemy_ic_resume_one(void __iomem *base, unsigned long *d)
 {
-	struct alchemy_ic_sysdev *icdev =
-			container_of(dev, struct alchemy_ic_sysdev, sysdev);
-
-	__raw_writel(0xffffffff, icdev->base + IC_MASKCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG0CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG1CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG2CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_SRCCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_ASSIGNCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_WAKECLR);
-	__raw_writel(0xffffffff, icdev->base + IC_RISINGCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_FALLINGCLR);
-	__raw_writel(0x00000000, icdev->base + IC_TESTBIT);
-	wmb();
-	__raw_writel(icdev->pmdata[0], icdev->base + IC_CFG0SET);
-	__raw_writel(icdev->pmdata[1], icdev->base + IC_CFG1SET);
-	__raw_writel(icdev->pmdata[2], icdev->base + IC_CFG2SET);
-	__raw_writel(icdev->pmdata[3], icdev->base + IC_SRCSET);
-	__raw_writel(icdev->pmdata[4], icdev->base + IC_ASSIGNSET);
-	__raw_writel(icdev->pmdata[5], icdev->base + IC_WAKESET);
+	ic_init(base);
+
+	__raw_writel(d[0], base + IC_CFG0SET);
+	__raw_writel(d[1], base + IC_CFG1SET);
+	__raw_writel(d[2], base + IC_CFG2SET);
+	__raw_writel(d[3], base + IC_SRCSET);
+	__raw_writel(d[4], base + IC_ASSIGNSET);
+	__raw_writel(d[5], base + IC_WAKESET);
 	wmb();
 
-	__raw_writel(icdev->pmdata[6], icdev->base + IC_MASKSET);
+	__raw_writel(d[6], base + IC_MASKSET);
 	wmb();
+}
 
+static int alchemy_ic_suspend(void)
+{
+	alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR),
+			       alchemy_ic_pmdata);
+	alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR),
+			       &alchemy_ic_pmdata[7]);
 	return 0;
 }
 
-static struct sysdev_class alchemy_ic_sysdev_class = {
-	.name		= "ic",
+static void alchemy_ic_resume(void)
+{
+	alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR),
+			      &alchemy_ic_pmdata[7]);
+	alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR),
+			      alchemy_ic_pmdata);
+}
+
+static struct syscore_ops alchemy_ic_syscore_ops = {
 	.suspend	= alchemy_ic_suspend,
 	.resume		= alchemy_ic_resume,
 };
 
-static int __init alchemy_ic_sysdev_init(void)
+static int __init alchemy_ic_pm_init(void)
 {
-	struct alchemy_ic_sysdev *icdev;
-	unsigned long icbase[2] = { IC0_PHYS_ADDR, IC1_PHYS_ADDR };
-	int err, i;
-
-	err = sysdev_class_register(&alchemy_ic_sysdev_class);
-	if (err)
-		return err;
-
-	for (i = 0; i < 2; i++) {
-		icdev = kzalloc(sizeof(struct alchemy_ic_sysdev), GFP_KERNEL);
-		if (!icdev)
-			return -ENOMEM;
-
-		icdev->base = ioremap(icbase[i], 0x1000);
-
-		icdev->sysdev.id = i;
-		icdev->sysdev.cls = &alchemy_ic_sysdev_class;
-		err = sysdev_register(&icdev->sysdev);
-		if (err) {
-			kfree(icdev);
-			return err;
-		}
-	}
-
+	register_syscore_ops(&alchemy_ic_syscore_ops);
 	return 0;
 }
-device_initcall(alchemy_ic_sysdev_init);
+device_initcall(alchemy_ic_pm_init);
diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 9e7814db3d03..3b2c18b14341 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -13,9 +13,10 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
+#include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
-#include <linux/init.h>
+#include <linux/slab.h>
 
 #include <asm/mach-au1x00/au1xxx.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
@@ -30,21 +31,12 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 #ifdef CONFIG_SERIAL_8250
 	switch (state) {
 	case 0:
-		if ((__raw_readl(port->membase + UART_MOD_CNTRL) & 3) != 3) {
-			/* power-on sequence as suggested in the databooks */
-			__raw_writel(0, port->membase + UART_MOD_CNTRL);
-			wmb();
-			__raw_writel(1, port->membase + UART_MOD_CNTRL);
-			wmb();
-		}
-		__raw_writel(3, port->membase + UART_MOD_CNTRL); /* full on */
-		wmb();
+		alchemy_uart_enable(CPHYSADDR(port->membase));
 		serial8250_do_pm(port, state, old_state);
 		break;
 	case 3:		/* power off */
 		serial8250_do_pm(port, state, old_state);
-		__raw_writel(0, port->membase + UART_MOD_CNTRL);
-		wmb();
+		alchemy_uart_disable(CPHYSADDR(port->membase));
 		break;
 	default:
 		serial8250_do_pm(port, state, old_state);
@@ -65,38 +57,60 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 		.pm		= alchemy_8250_pm,		\
 	}
 
-static struct plat_serial8250_port au1x00_uart_data[] = {
-#if defined(CONFIG_SOC_AU1000)
-	PORT(UART0_PHYS_ADDR, AU1000_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1000_UART1_INT),
-	PORT(UART2_PHYS_ADDR, AU1000_UART2_INT),
-	PORT(UART3_PHYS_ADDR, AU1000_UART3_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	PORT(UART0_PHYS_ADDR, AU1500_UART0_INT),
-	PORT(UART3_PHYS_ADDR, AU1500_UART3_INT),
-#elif defined(CONFIG_SOC_AU1100)
-	PORT(UART0_PHYS_ADDR, AU1100_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1100_UART1_INT),
-	PORT(UART3_PHYS_ADDR, AU1100_UART3_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	PORT(UART0_PHYS_ADDR, AU1550_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1550_UART1_INT),
-	PORT(UART3_PHYS_ADDR, AU1550_UART3_INT),
-#elif defined(CONFIG_SOC_AU1200)
-	PORT(UART0_PHYS_ADDR, AU1200_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1200_UART1_INT),
-#endif
-	{ },
+static struct plat_serial8250_port au1x00_uart_data[][4] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1000_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1000_UART1_INT),
+		PORT(AU1000_UART2_PHYS_ADDR, AU1000_UART2_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1000_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1500_UART0_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1500_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1100] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1100_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1100_UART1_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1100_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1550_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1550_UART1_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1550_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1200] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1200_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1200_UART1_INT),
+	},
 };
 
 static struct platform_device au1xx0_uart_device = {
 	.name			= "serial8250",
 	.id			= PLAT8250_DEV_AU1X00,
-	.dev			= {
-		.platform_data	= au1x00_uart_data,
-	},
 };
 
+static void __init alchemy_setup_uarts(int ctype)
+{
+	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
+	int s = sizeof(struct plat_serial8250_port);
+	int c = alchemy_get_uarts(ctype);
+	struct plat_serial8250_port *ports;
+
+	ports = kzalloc(s * (c + 1), GFP_KERNEL);
+	if (!ports) {
+		printk(KERN_INFO "Alchemy: no memory for UART data\n");
+		return;
+	}
+	memcpy(ports, au1x00_uart_data[ctype], s * c);
+	au1xx0_uart_device.dev.platform_data = ports;
+
+	/* Fill up uartclk. */
+	for (s = 0; s < c; s++)
+		ports[s].uartclk = uartclk;
+	if (platform_device_register(&au1xx0_uart_device))
+		printk(KERN_INFO "Alchemy: failed to register UARTs\n");
+}
+
 /* OHCI (USB full speed host controller) */
 static struct resource au1xxx_usb_ohci_resources[] = {
 	[0] = {
@@ -269,8 +283,8 @@ extern struct au1xmmc_platform_data au1xmmc_platdata[2];
 
 static struct resource au1200_mmc0_resources[] = {
 	[0] = {
-		.start          = SD0_PHYS_ADDR,
-		.end            = SD0_PHYS_ADDR + 0x7ffff,
+		.start          = AU1100_SD0_PHYS_ADDR,
+		.end            = AU1100_SD0_PHYS_ADDR + 0xfff,
 		.flags          = IORESOURCE_MEM,
 	},
 	[1] = {
@@ -305,8 +319,8 @@ static struct platform_device au1200_mmc0_device = {
 #ifndef CONFIG_MIPS_DB1200
 static struct resource au1200_mmc1_resources[] = {
 	[0] = {
-		.start          = SD1_PHYS_ADDR,
-		.end            = SD1_PHYS_ADDR + 0x7ffff,
+		.start          = AU1100_SD1_PHYS_ADDR,
+		.end            = AU1100_SD1_PHYS_ADDR + 0xfff,
 		.flags          = IORESOURCE_MEM,
 	},
 	[1] = {
@@ -359,15 +373,16 @@ static struct platform_device pbdb_smbus_device = {
 #endif
 
 /* Macro to help defining the Ethernet MAC resources */
+#define MAC_RES_COUNT	3	/* MAC regs base, MAC enable reg, MAC INT */
 #define MAC_RES(_base, _enable, _irq)			\
 	{						\
-		.start	= CPHYSADDR(_base),		\
-		.end	= CPHYSADDR(_base + 0xffff),	\
+		.start	= _base,			\
+		.end	= _base + 0xffff,		\
 		.flags	= IORESOURCE_MEM,		\
 	},						\
 	{						\
-		.start	= CPHYSADDR(_enable),		\
-		.end	= CPHYSADDR(_enable + 0x3),	\
+		.start	= _enable,			\
+		.end	= _enable + 0x3,		\
 		.flags	= IORESOURCE_MEM,		\
 	},						\
 	{						\
@@ -376,19 +391,29 @@ static struct platform_device pbdb_smbus_device = {
 		.flags	= IORESOURCE_IRQ		\
 	}
 
-static struct resource au1xxx_eth0_resources[] = {
-#if defined(CONFIG_SOC_AU1000)
-	MAC_RES(AU1000_ETH0_BASE, AU1000_MAC0_ENABLE, AU1000_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1100)
-	MAC_RES(AU1100_ETH0_BASE, AU1100_MAC0_ENABLE, AU1100_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	MAC_RES(AU1550_ETH0_BASE, AU1550_MAC0_ENABLE, AU1550_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	MAC_RES(AU1500_ETH0_BASE, AU1500_MAC0_ENABLE, AU1500_MAC0_DMA_INT),
-#endif
+static struct resource au1xxx_eth0_resources[][MAC_RES_COUNT] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1000_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		MAC_RES(AU1500_MAC0_PHYS_ADDR,
+			AU1500_MACEN_PHYS_ADDR,
+			AU1500_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1100] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1100_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1550_MAC0_DMA_INT)
+	},
 };
 
-
 static struct au1000_eth_platform_data au1xxx_eth0_platform_data = {
 	.phy1_search_mac0 = 1,
 };
@@ -396,20 +421,26 @@ static struct au1000_eth_platform_data au1xxx_eth0_platform_data = {
 static struct platform_device au1xxx_eth0_device = {
 	.name		= "au1000-eth",
 	.id		= 0,
-	.num_resources	= ARRAY_SIZE(au1xxx_eth0_resources),
-	.resource	= au1xxx_eth0_resources,
+	.num_resources	= MAC_RES_COUNT,
 	.dev.platform_data = &au1xxx_eth0_platform_data,
 };
 
-#ifndef CONFIG_SOC_AU1100
-static struct resource au1xxx_eth1_resources[] = {
-#if defined(CONFIG_SOC_AU1000)
-	MAC_RES(AU1000_ETH1_BASE, AU1000_MAC1_ENABLE, AU1000_MAC1_DMA_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	MAC_RES(AU1550_ETH1_BASE, AU1550_MAC1_ENABLE, AU1550_MAC1_DMA_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	MAC_RES(AU1500_ETH1_BASE, AU1500_MAC1_ENABLE, AU1500_MAC1_DMA_INT),
-#endif
+static struct resource au1xxx_eth1_resources[][MAC_RES_COUNT] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		MAC_RES(AU1000_MAC1_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR + 4,
+			AU1000_MAC1_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		MAC_RES(AU1500_MAC1_PHYS_ADDR,
+			AU1500_MACEN_PHYS_ADDR + 4,
+			AU1500_MAC1_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		MAC_RES(AU1000_MAC1_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR + 4,
+			AU1550_MAC1_DMA_INT)
+	},
 };
 
 static struct au1000_eth_platform_data au1xxx_eth1_platform_data = {
@@ -419,11 +450,9 @@ static struct au1000_eth_platform_data au1xxx_eth1_platform_data = {
 static struct platform_device au1xxx_eth1_device = {
 	.name		= "au1000-eth",
 	.id		= 1,
-	.num_resources	= ARRAY_SIZE(au1xxx_eth1_resources),
-	.resource	= au1xxx_eth1_resources,
+	.num_resources	= MAC_RES_COUNT,
 	.dev.platform_data = &au1xxx_eth1_platform_data,
 };
-#endif
 
 void __init au1xxx_override_eth_cfg(unsigned int port,
 			struct au1000_eth_platform_data *eth_data)
@@ -434,15 +463,65 @@ void __init au1xxx_override_eth_cfg(unsigned int port,
 	if (port == 0)
 		memcpy(&au1xxx_eth0_platform_data, eth_data,
 			sizeof(struct au1000_eth_platform_data));
-#ifndef CONFIG_SOC_AU1100
 	else
 		memcpy(&au1xxx_eth1_platform_data, eth_data,
 			sizeof(struct au1000_eth_platform_data));
-#endif
+}
+
+static void __init alchemy_setup_macs(int ctype)
+{
+	int ret, i;
+	unsigned char ethaddr[6];
+	struct resource *macres;
+
+	/* Handle 1st MAC */
+	if (alchemy_get_macs(ctype) < 1)
+		return;
+
+	macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL);
+	if (!macres) {
+		printk(KERN_INFO "Alchemy: no memory for MAC0 resources\n");
+		return;
+	}
+	memcpy(macres, au1xxx_eth0_resources[ctype],
+	       sizeof(struct resource) * MAC_RES_COUNT);
+	au1xxx_eth0_device.resource = macres;
+
+	i = prom_get_ethernet_addr(ethaddr);
+	if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac))
+		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
+
+	ret = platform_device_register(&au1xxx_eth0_device);
+	if (!ret)
+		printk(KERN_INFO "Alchemy: failed to register MAC0\n");
+
+
+	/* Handle 2nd MAC */
+	if (alchemy_get_macs(ctype) < 2)
+		return;
+
+	macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL);
+	if (!macres) {
+		printk(KERN_INFO "Alchemy: no memory for MAC1 resources\n");
+		return;
+	}
+	memcpy(macres, au1xxx_eth1_resources[ctype],
+	       sizeof(struct resource) * MAC_RES_COUNT);
+	au1xxx_eth1_device.resource = macres;
+
+	ethaddr[5] += 1;	/* next addr for 2nd MAC */
+	if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac))
+		memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6);
+
+	/* Register second MAC if enabled in pinfunc */
+	if (!(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2)) {
+		ret = platform_device_register(&au1xxx_eth1_device);
+		if (ret)
+			printk(KERN_INFO "Alchemy: failed to register MAC1\n");
+	}
 }
 
 static struct platform_device *au1xxx_platform_devices[] __initdata = {
-	&au1xx0_uart_device,
 	&au1xxx_usb_ohci_device,
 #ifdef CONFIG_FB_AU1100
 	&au1100_lcd_device,
@@ -460,36 +539,17 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
 #ifdef SMBUS_PSC_BASE
 	&pbdb_smbus_device,
 #endif
-	&au1xxx_eth0_device,
 };
 
 static int __init au1xxx_platform_init(void)
 {
-	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
-	int err, i;
-	unsigned char ethaddr[6];
+	int err, ctype = alchemy_get_cputype();
 
-	/* Fill up uartclk. */
-	for (i = 0; au1x00_uart_data[i].flags; i++)
-		au1x00_uart_data[i].uartclk = uartclk;
-
-	/* use firmware-provided mac addr if available and necessary */
-	i = prom_get_ethernet_addr(ethaddr);
-	if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac))
-		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
+	alchemy_setup_uarts(ctype);
+	alchemy_setup_macs(ctype);
 
 	err = platform_add_devices(au1xxx_platform_devices,
 				   ARRAY_SIZE(au1xxx_platform_devices));
-#ifndef CONFIG_SOC_AU1100
-	ethaddr[5] += 1;	/* next addr for 2nd MAC */
-	if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac))
-		memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6);
-
-	/* Register second MAC if enabled in pinfunc */
-	if (!err && !(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2))
-		err = platform_device_register(&au1xxx_eth1_device);
-#endif
-
 	return err;
 }
 
diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 561e5da2658b..1b887c868417 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -52,8 +52,6 @@ void __init plat_mem_setup(void)
 	/* this is faster than wasting cycles trying to approximate it */
 	preset_lpj = (est_freq >> 1) / HZ;
 
-	board_setup();  /* board specific setup */
-
 	if (au1xxx_cpu_needs_config_od())
 		/* Various early Au1xx0 errata corrected by this */
 		set_c0_config(1 << 19); /* Set Config[OD] */
@@ -61,6 +59,8 @@ void __init plat_mem_setup(void)
 		/* Clear to obtain best system bus performance */
 		clear_c0_config(1 << 19); /* Clear Config[OD] */
 
+	board_setup();  /* board specific setup */
+
 	/* IO/MEM resources. */
 	set_io_port_base(0);
 	ioport_resource.start = IOPORT_RESOURCE_START;
diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c
index 2aecb2fdf982..d5da6adbf634 100644
--- a/arch/mips/alchemy/common/time.c
+++ b/arch/mips/alchemy/common/time.c
@@ -141,8 +141,7 @@ static int __init alchemy_time_init(unsigned int m2int)
 		goto cntr_err;
 
 	/* register counter1 clocksource and event device */
-	clocksource_set_clock(&au1x_counter1_clocksource, 32768);
-	clocksource_register(&au1x_counter1_clocksource);
+	clocksource_register_hz(&au1x_counter1_clocksource, 32768);
 
 	cd->shift = 32;
 	cd->mult = div_sc(32768, NSEC_PER_SEC, cd->shift);
diff --git a/arch/mips/alchemy/devboards/db1200/setup.c b/arch/mips/alchemy/devboards/db1200/setup.c
index 4a8980027ecf..1dac4f27d334 100644
--- a/arch/mips/alchemy/devboards/db1200/setup.c
+++ b/arch/mips/alchemy/devboards/db1200/setup.c
@@ -23,6 +23,13 @@ void __init board_setup(void)
 	unsigned long freq0, clksrc, div, pfc;
 	unsigned short whoami;
 
+	/* Set Config[OD] (disable overlapping bus transaction):
+	 * This gets rid of a _lot_ of spurious interrupts (especially
+	 * wrt. IDE); but incurs ~10% performance hit in some
+	 * cpu-bound applications.
+	 */
+	set_c0_config(1 << 19);
+
 	bcsr_init(DB1200_BCSR_PHYS_ADDR,
 		  DB1200_BCSR_PHYS_ADDR + DB1200_BCSR_HEXLED_OFS);
 
diff --git a/arch/mips/alchemy/devboards/db1x00/board_setup.c b/arch/mips/alchemy/devboards/db1x00/board_setup.c
index 05f120ff90f9..5c956fe8760f 100644
--- a/arch/mips/alchemy/devboards/db1x00/board_setup.c
+++ b/arch/mips/alchemy/devboards/db1x00/board_setup.c
@@ -127,13 +127,10 @@ const char *get_system_type(void)
 void __init board_setup(void)
 {
 	unsigned long bcsr1, bcsr2;
-	u32 pin_func;
 
 	bcsr1 = DB1000_BCSR_PHYS_ADDR;
 	bcsr2 = DB1000_BCSR_PHYS_ADDR + DB1000_BCSR_HEXLED_OFS;
 
-	pin_func = 0;
-
 #ifdef CONFIG_MIPS_DB1000
 	printk(KERN_INFO "AMD Alchemy Au1000/Db1000 Board\n");
 #endif
@@ -164,12 +161,16 @@ void __init board_setup(void)
 	/* Not valid for Au1550 */
 #if defined(CONFIG_IRDA) && \
    (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100))
-	/* Set IRFIRSEL instead of GPIO15 */
-	pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF;
-	au_writel(pin_func, SYS_PINFUNC);
-	/* Power off until the driver is in use */
-	bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK,
-				BCSR_RESETS_IRDA_MODE_OFF);
+	{
+		u32 pin_func;
+
+		/* Set IRFIRSEL instead of GPIO15 */
+		pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF;
+		au_writel(pin_func, SYS_PINFUNC);
+		/* Power off until the driver is in use */
+		bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK,
+			 BCSR_RESETS_IRDA_MODE_OFF);
+	}
 #endif
 	bcsr_write(BCSR_PCMCIA, 0);	/* turn off PCMCIA power */
 
@@ -177,31 +178,35 @@ void __init board_setup(void)
 	alchemy_gpio1_input_enable();
 
 #ifdef CONFIG_MIPS_MIRAGE
-	/* GPIO[20] is output */
-	alchemy_gpio_direction_output(20, 0);
+	{
+		u32 pin_func;
 
-	/* Set GPIO[210:208] instead of SSI_0 */
-	pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0;
+		/* GPIO[20] is output */
+		alchemy_gpio_direction_output(20, 0);
 
-	/* Set GPIO[215:211] for LEDs */
-	pin_func |= 5 << 2;
+		/* Set GPIO[210:208] instead of SSI_0 */
+		pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0;
 
-	/* Set GPIO[214:213] for more LEDs */
-	pin_func |= 5 << 12;
+		/* Set GPIO[215:211] for LEDs */
+		pin_func |= 5 << 2;
 
-	/* Set GPIO[207:200] instead of PCMCIA/LCD */
-	pin_func |= SYS_PF_LCD | SYS_PF_PC;
-	au_writel(pin_func, SYS_PINFUNC);
+		/* Set GPIO[214:213] for more LEDs */
+		pin_func |= 5 << 12;
 
-	/*
-	 * Enable speaker amplifier.  This should
-	 * be part of the audio driver.
-	 */
-	alchemy_gpio_direction_output(209, 1);
+		/* Set GPIO[207:200] instead of PCMCIA/LCD */
+		pin_func |= SYS_PF_LCD | SYS_PF_PC;
+		au_writel(pin_func, SYS_PINFUNC);
 
-	pm_power_off = mirage_power_off;
-	_machine_halt = mirage_power_off;
-	_machine_restart = (void(*)(char *))mips_softreset;
+		/*
+		 * Enable speaker amplifier.  This should
+		 * be part of the audio driver.
+		 */
+		alchemy_gpio_direction_output(209, 1);
+
+		pm_power_off = mirage_power_off;
+		_machine_halt = mirage_power_off;
+		_machine_restart = (void(*)(char *))mips_softreset;
+	}
 #endif
 
 #ifdef CONFIG_MIPS_BOSPORUS
diff --git a/arch/mips/alchemy/devboards/pb1000/board_setup.c b/arch/mips/alchemy/devboards/pb1000/board_setup.c
index 2d85c4b5be09..e64fdcbf75d0 100644
--- a/arch/mips/alchemy/devboards/pb1000/board_setup.c
+++ b/arch/mips/alchemy/devboards/pb1000/board_setup.c
@@ -65,7 +65,7 @@ void __init board_setup(void)
 
 	/* Set AUX clock to 12 MHz * 8 = 96 MHz */
 	au_writel(8, SYS_AUXPLL);
-	au_writel(0, SYS_PINSTATERD);
+	alchemy_gpio1_input_enable();
 	udelay(100);
 
 #if defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE)
diff --git a/arch/mips/alchemy/devboards/pb1500/board_setup.c b/arch/mips/alchemy/devboards/pb1500/board_setup.c
index 83f46215eb0c..3b4fa3206969 100644
--- a/arch/mips/alchemy/devboards/pb1500/board_setup.c
+++ b/arch/mips/alchemy/devboards/pb1500/board_setup.c
@@ -56,7 +56,7 @@ void __init board_setup(void)
 	sys_clksrc = sys_freqctrl = pin_func = 0;
 	/* Set AUX clock to 12 MHz * 8 = 96 MHz */
 	au_writel(8, SYS_AUXPLL);
-	au_writel(0, SYS_PINSTATERD);
+	alchemy_gpio1_input_enable();
 	udelay(100);
 
 	/* GPIO201 is input for PCMCIA card detect */
diff --git a/arch/mips/alchemy/devboards/prom.c b/arch/mips/alchemy/devboards/prom.c
index baeb21385058..e5306b56da6d 100644
--- a/arch/mips/alchemy/devboards/prom.c
+++ b/arch/mips/alchemy/devboards/prom.c
@@ -62,5 +62,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-    alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/gpr/board_setup.c b/arch/mips/alchemy/gpr/board_setup.c
index ad2e3f137933..5f8f0691ed2d 100644
--- a/arch/mips/alchemy/gpr/board_setup.c
+++ b/arch/mips/alchemy/gpr/board_setup.c
@@ -36,9 +36,6 @@
 
 #include <prom.h>
 
-#define UART1_ADDR	KSEG1ADDR(UART1_PHYS_ADDR)
-#define UART3_ADDR	KSEG1ADDR(UART3_PHYS_ADDR)
-
 char irq_tab_alchemy[][5] __initdata = {
 	[0] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff },
 };
@@ -67,18 +64,15 @@ static void gpr_power_off(void)
 
 void __init board_setup(void)
 {
-	printk(KERN_INFO "Tarpeze ITS GPR board\n");
+	printk(KERN_INFO "Trapeze ITS GPR board\n");
 
 	pm_power_off = gpr_power_off;
 	_machine_halt = gpr_power_off;
 	_machine_restart = gpr_reset;
 
-	/* Enable UART3 */
-	au_writel(0x1, UART3_ADDR + UART_MOD_CNTRL);/* clock enable (CE) */
-	au_writel(0x3, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
-	/* Enable UART1 */
-	au_writel(0x1, UART1_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */
-	au_writel(0x3, UART1_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
+	/* Enable UART1/3 */
+	alchemy_uart_enable(AU1000_UART3_PHYS_ADDR);
+	alchemy_uart_enable(AU1000_UART1_PHYS_ADDR);
 
 	/* Take away Reset of UMTS-card */
 	alchemy_gpio_direction_output(215, 1);
diff --git a/arch/mips/alchemy/gpr/init.c b/arch/mips/alchemy/gpr/init.c
index f044f4c541d7..229aafae680c 100644
--- a/arch/mips/alchemy/gpr/init.c
+++ b/arch/mips/alchemy/gpr/init.c
@@ -59,5 +59,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index cf436ab679ae..3ae984cf98cf 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -87,7 +87,7 @@ void __init board_setup(void)
 	au_writel(SYS_PF_NI2, SYS_PINFUNC);
 
 	/* Initialize GPIO */
-	au_writel(0xFFFFFFFF, SYS_TRIOUTCLR);
+	au_writel(~0, KSEG1ADDR(AU1000_SYS_PHYS_ADDR) + SYS_TRIOUTCLR);
 	alchemy_gpio_direction_output(0, 0);	/* Disable M66EN (PCI 66MHz) */
 	alchemy_gpio_direction_output(3, 1);	/* Disable PCI CLKRUN# */
 	alchemy_gpio_direction_output(1, 1);	/* Enable EXT_IO3 */
diff --git a/arch/mips/alchemy/mtx-1/init.c b/arch/mips/alchemy/mtx-1/init.c
index f8d25575fa05..2e81cc7f3422 100644
--- a/arch/mips/alchemy/mtx-1/init.c
+++ b/arch/mips/alchemy/mtx-1/init.c
@@ -62,5 +62,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index 956f946218c5..55628e390fd7 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -53,8 +53,8 @@ static struct platform_device mtx1_button = {
 
 static struct resource mtx1_wdt_res[] = {
 	[0] = {
-		.start	= 15,
-		.end	= 15,
+		.start	= 215,
+		.end	= 215,
 		.name	= "mtx1-wdt-gpio",
 		.flags	= IORESOURCE_IRQ,
 	}
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index febfb0fb0896..81e57fad07ab 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -66,13 +66,10 @@ void __init board_setup(void)
 	au_writel(pin_func, SYS_PINFUNC);
 
 	/* Enable UART */
-	au_writel(0x01, UART3_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */
-	mdelay(10);
-	au_writel(0x03, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
-	mdelay(10);
-
-	/* Enable DTR = USB power up */
-	au_writel(0x01, UART3_ADDR + UART_MCR); /* UART_MCR_DTR is 0x01??? */
+	alchemy_uart_enable(AU1000_UART3_PHYS_ADDR);
+	/* Enable DTR (MCR bit 0) = USB power up */
+	__raw_writel(1, (void __iomem *)KSEG1ADDR(AU1000_UART3_PHYS_ADDR + 0x18));
+	wmb();
 
 #ifdef CONFIG_PCI
 #if defined(__MIPSEB__)
diff --git a/arch/mips/alchemy/xxs1500/init.c b/arch/mips/alchemy/xxs1500/init.c
index 15125c2fda7d..0ee02cfa989d 100644
--- a/arch/mips/alchemy/xxs1500/init.c
+++ b/arch/mips/alchemy/xxs1500/init.c
@@ -51,14 +51,13 @@ void __init prom_init(void)
 	prom_init_cmdline();
 
 	memsize_str = prom_getenv("memsize");
-	if (!memsize_str)
+	if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize))
 		memsize = 0x04000000;
-	else
-		strict_strtoul(memsize_str, 0, &memsize);
+
 	add_memory_region(0, memsize, BOOT_MEM_RAM);
 }
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index 425dfa5d6e12..bb571bcdb8f2 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -325,9 +325,7 @@ int __init ar7_gpio_init(void)
 		size = 0x1f;
 	}
 
-	gpch->regs = ioremap_nocache(AR7_REGS_GPIO,
-					AR7_REGS_GPIO + 0x10);
-
+	gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size);
 	if (!gpch->regs) {
 		printk(KERN_ERR "%s: failed to ioremap regs\n",
 					gpch->chip.label);
diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c
index e5b6615731e5..54db815bc86c 100644
--- a/arch/mips/bcm47xx/nvram.c
+++ b/arch/mips/bcm47xx/nvram.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2005 Broadcom Corporation
  * Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de>
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -23,7 +24,7 @@
 static char nvram_buf[NVRAM_SPACE];
 
 /* Probe for NVRAM header */
-static void __init early_nvram_init(void)
+static void early_nvram_init(void)
 {
 	struct ssb_mipscore *mcore = &ssb_bcm47xx.mipscore;
 	struct nvram_header *header;
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index c95f90bf734c..73b529b57433 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -3,6 +3,7 @@
  *  Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org>
  *  Copyright (C) 2006 Michael Buesch <mb@bu3sch.de>
  *  Copyright (C) 2010 Waldemar Brodkorb <wbx@openadk.org>
+ *  Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de>
  *
  *  This program is free software; you can redistribute  it and/or modify it
  *  under  the terms of  the GNU General  Public License as published by the
@@ -57,10 +58,49 @@ static void bcm47xx_machine_halt(void)
 }
 
 #define READ_FROM_NVRAM(_outvar, name, buf) \
-	if (nvram_getenv(name, buf, sizeof(buf)) >= 0)\
+	if (nvram_getprefix(prefix, name, buf, sizeof(buf)) >= 0)\
 		sprom->_outvar = simple_strtoul(buf, NULL, 0);
 
-static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
+#define READ_FROM_NVRAM2(_outvar, name1, name2, buf) \
+	if (nvram_getprefix(prefix, name1, buf, sizeof(buf)) >= 0 || \
+	    nvram_getprefix(prefix, name2, buf, sizeof(buf)) >= 0)\
+		sprom->_outvar = simple_strtoul(buf, NULL, 0);
+
+static inline int nvram_getprefix(const char *prefix, char *name,
+				  char *buf, int len)
+{
+	if (prefix) {
+		char key[100];
+
+		snprintf(key, sizeof(key), "%s%s", prefix, name);
+		return nvram_getenv(key, buf, len);
+	}
+
+	return nvram_getenv(name, buf, len);
+}
+
+static u32 nvram_getu32(const char *name, char *buf, int len)
+{
+	int rv;
+	char key[100];
+	u16 var0, var1;
+
+	snprintf(key, sizeof(key), "%s0", name);
+	rv = nvram_getenv(key, buf, len);
+	/* return 0 here so this looks like unset */
+	if (rv < 0)
+		return 0;
+	var0 = simple_strtoul(buf, NULL, 0);
+
+	snprintf(key, sizeof(key), "%s1", name);
+	rv = nvram_getenv(key, buf, len);
+	if (rv < 0)
+		return 0;
+	var1 = simple_strtoul(buf, NULL, 0);
+	return var1 << 16 | var0;
+}
+
+static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 {
 	char buf[100];
 	u32 boardflags;
@@ -69,11 +109,12 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 
 	sprom->revision = 1; /* Fallback: Old hardware does not define this. */
 	READ_FROM_NVRAM(revision, "sromrev", buf);
-	if (nvram_getenv("il0macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "il0macaddr", buf, sizeof(buf)) >= 0 ||
+	    nvram_getprefix(prefix, "macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->il0mac);
-	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "et0macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->et0mac);
-	if (nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "et1macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->et1mac);
 	READ_FROM_NVRAM(et0phyaddr, "et0phyaddr", buf);
 	READ_FROM_NVRAM(et1phyaddr, "et1phyaddr", buf);
@@ -95,20 +136,36 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 	READ_FROM_NVRAM(pa1hib0, "pa1hib0", buf);
 	READ_FROM_NVRAM(pa1hib2, "pa1hib1", buf);
 	READ_FROM_NVRAM(pa1hib1, "pa1hib2", buf);
-	READ_FROM_NVRAM(gpio0, "wl0gpio0", buf);
-	READ_FROM_NVRAM(gpio1, "wl0gpio1", buf);
-	READ_FROM_NVRAM(gpio2, "wl0gpio2", buf);
-	READ_FROM_NVRAM(gpio3, "wl0gpio3", buf);
-	READ_FROM_NVRAM(maxpwr_bg, "pa0maxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_al, "pa1lomaxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_a, "pa1maxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_ah, "pa1himaxpwr", buf);
-	READ_FROM_NVRAM(itssi_a, "pa1itssit", buf);
-	READ_FROM_NVRAM(itssi_bg, "pa0itssit", buf);
+	READ_FROM_NVRAM2(gpio0, "ledbh0", "wl0gpio0", buf);
+	READ_FROM_NVRAM2(gpio1, "ledbh1", "wl0gpio1", buf);
+	READ_FROM_NVRAM2(gpio2, "ledbh2", "wl0gpio2", buf);
+	READ_FROM_NVRAM2(gpio3, "ledbh3", "wl0gpio3", buf);
+	READ_FROM_NVRAM2(maxpwr_bg, "maxp2ga0", "pa0maxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_al, "maxp5gla0", "pa1lomaxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_a, "maxp5ga0", "pa1maxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_ah, "maxp5gha0", "pa1himaxpwr", buf);
+	READ_FROM_NVRAM2(itssi_bg, "itt5ga0", "pa0itssit", buf);
+	READ_FROM_NVRAM2(itssi_a, "itt2ga0", "pa1itssit", buf);
 	READ_FROM_NVRAM(tri2g, "tri2g", buf);
 	READ_FROM_NVRAM(tri5gl, "tri5gl", buf);
 	READ_FROM_NVRAM(tri5g, "tri5g", buf);
 	READ_FROM_NVRAM(tri5gh, "tri5gh", buf);
+	READ_FROM_NVRAM(txpid2g[0], "txpid2ga0", buf);
+	READ_FROM_NVRAM(txpid2g[1], "txpid2ga1", buf);
+	READ_FROM_NVRAM(txpid2g[2], "txpid2ga2", buf);
+	READ_FROM_NVRAM(txpid2g[3], "txpid2ga3", buf);
+	READ_FROM_NVRAM(txpid5g[0], "txpid5ga0", buf);
+	READ_FROM_NVRAM(txpid5g[1], "txpid5ga1", buf);
+	READ_FROM_NVRAM(txpid5g[2], "txpid5ga2", buf);
+	READ_FROM_NVRAM(txpid5g[3], "txpid5ga3", buf);
+	READ_FROM_NVRAM(txpid5gl[0], "txpid5gla0", buf);
+	READ_FROM_NVRAM(txpid5gl[1], "txpid5gla1", buf);
+	READ_FROM_NVRAM(txpid5gl[2], "txpid5gla2", buf);
+	READ_FROM_NVRAM(txpid5gl[3], "txpid5gla3", buf);
+	READ_FROM_NVRAM(txpid5gh[0], "txpid5gha0", buf);
+	READ_FROM_NVRAM(txpid5gh[1], "txpid5gha1", buf);
+	READ_FROM_NVRAM(txpid5gh[2], "txpid5gha2", buf);
+	READ_FROM_NVRAM(txpid5gh[3], "txpid5gha3", buf);
 	READ_FROM_NVRAM(rxpo2g, "rxpo2g", buf);
 	READ_FROM_NVRAM(rxpo5g, "rxpo5g", buf);
 	READ_FROM_NVRAM(rssisav2g, "rssisav2g", buf);
@@ -120,19 +177,27 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 	READ_FROM_NVRAM(rssismf5g, "rssismf5g", buf);
 	READ_FROM_NVRAM(bxa5g, "bxa5g", buf);
 	READ_FROM_NVRAM(cck2gpo, "cck2gpo", buf);
-	READ_FROM_NVRAM(ofdm2gpo, "ofdm2gpo", buf);
-	READ_FROM_NVRAM(ofdm5glpo, "ofdm5glpo", buf);
-	READ_FROM_NVRAM(ofdm5gpo, "ofdm5gpo", buf);
-	READ_FROM_NVRAM(ofdm5ghpo, "ofdm5ghpo", buf);
 
-	if (nvram_getenv("boardflags", buf, sizeof(buf)) >= 0) {
+	sprom->ofdm2gpo = nvram_getu32("ofdm2gpo", buf, sizeof(buf));
+	sprom->ofdm5glpo = nvram_getu32("ofdm5glpo", buf, sizeof(buf));
+	sprom->ofdm5gpo = nvram_getu32("ofdm5gpo", buf, sizeof(buf));
+	sprom->ofdm5ghpo = nvram_getu32("ofdm5ghpo", buf, sizeof(buf));
+
+	READ_FROM_NVRAM(antenna_gain.ghz24.a0, "ag0", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a1, "ag1", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a2, "ag2", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a3, "ag3", buf);
+	memcpy(&sprom->antenna_gain.ghz5, &sprom->antenna_gain.ghz24,
+	       sizeof(sprom->antenna_gain.ghz5));
+
+	if (nvram_getprefix(prefix, "boardflags", buf, sizeof(buf)) >= 0) {
 		boardflags = simple_strtoul(buf, NULL, 0);
 		if (boardflags) {
 			sprom->boardflags_lo = (boardflags & 0x0000FFFFU);
 			sprom->boardflags_hi = (boardflags & 0xFFFF0000U) >> 16;
 		}
 	}
-	if (nvram_getenv("boardflags2", buf, sizeof(buf)) >= 0) {
+	if (nvram_getprefix(prefix, "boardflags2", buf, sizeof(buf)) >= 0) {
 		boardflags = simple_strtoul(buf, NULL, 0);
 		if (boardflags) {
 			sprom->boardflags2_lo = (boardflags & 0x0000FFFFU);
@@ -141,6 +206,22 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 	}
 }
 
+int bcm47xx_get_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	char prefix[10];
+
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		snprintf(prefix, sizeof(prefix), "pci/%u/%u/",
+			 bus->host_pci->bus->number + 1,
+			 PCI_SLOT(bus->host_pci->devfn));
+		bcm47xx_fill_sprom(out, prefix);
+		return 0;
+	} else {
+		printk(KERN_WARNING "bcm47xx: unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
+
 static int bcm47xx_get_invariants(struct ssb_bus *bus,
 				   struct ssb_init_invariants *iv)
 {
@@ -158,7 +239,7 @@ static int bcm47xx_get_invariants(struct ssb_bus *bus,
 	if (nvram_getenv("boardrev", buf, sizeof(buf)) >= 0)
 		iv->boardinfo.rev = (u16)simple_strtoul(buf, NULL, 0);
 
-	bcm47xx_fill_sprom(&iv->sprom);
+	bcm47xx_fill_sprom(&iv->sprom, NULL);
 
 	if (nvram_getenv("cardbus", buf, sizeof(buf)) >= 0)
 		iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10);
@@ -172,6 +253,11 @@ void __init plat_mem_setup(void)
 	char buf[100];
 	struct ssb_mipscore *mcore;
 
+	err = ssb_arch_register_fallback_sprom(&bcm47xx_get_sprom);
+	if (err)
+		printk(KERN_WARNING "bcm47xx: someone else already registered"
+			" a ssb SPROM callback handler (err %d)\n", err);
+
 	err = ssb_bus_ssbbus_register(&ssb_bcm47xx, SSB_ENUM_BASE,
 				      bcm47xx_get_invariants);
 	if (err)
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 8dba8cfb752f..40b223b603be 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -643,6 +643,17 @@ static struct ssb_sprom bcm63xx_sprom = {
 	.boardflags_lo		= 0x2848,
 	.boardflags_hi		= 0x0000,
 };
+
+int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
 #endif
 
 /*
@@ -793,8 +804,9 @@ void __init board_prom_init(void)
 	if (!board_get_mac_address(bcm63xx_sprom.il0mac)) {
 		memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN);
 		memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN);
-		if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0)
-			printk(KERN_ERR "failed to register fallback SPROM\n");
+		if (ssb_arch_register_fallback_sprom(
+				&bcm63xx_get_fallback_sprom) < 0)
+			printk(KERN_ERR PFX "failed to register fallback SPROM\n");
 	}
 #endif
 }
diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
index 88c9d963be88..9a6243676e22 100644
--- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
+++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
@@ -16,8 +16,8 @@
 
 int main(int argc, char *argv[])
 {
+	unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr;
 	struct stat sb;
-	uint64_t vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr;
 
 	if (argc != 3) {
 		fprintf(stderr, "Usage: %s <pathname> <vmlinux_load_addr>\n",
diff --git a/arch/mips/boot/compressed/uart-alchemy.c b/arch/mips/boot/compressed/uart-alchemy.c
index 1bff22fa089b..eb063e6dead9 100644
--- a/arch/mips/boot/compressed/uart-alchemy.c
+++ b/arch/mips/boot/compressed/uart-alchemy.c
@@ -3,5 +3,5 @@
 void putc(char c)
 {
 	/* all current (Jan. 2010) in-kernel boards */
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index caae22858163..cad555ebeca3 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -1,11 +1,7 @@
-config CAVIUM_OCTEON_SPECIFIC_OPTIONS
-	bool "Enable Octeon specific options"
-	depends on CPU_CAVIUM_OCTEON
-	default "y"
+if CPU_CAVIUM_OCTEON
 
 config CAVIUM_CN63XXP1
 	bool "Enable CN63XXP1 errata worarounds"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "n"
 	help
 	  The CN63XXP1 chip requires build time workarounds to
@@ -16,7 +12,6 @@ config CAVIUM_CN63XXP1
 
 config CAVIUM_OCTEON_2ND_KERNEL
 	bool "Build the kernel to be used as a 2nd kernel on the same chip"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "n"
 	help
 	  This option configures this kernel to be linked at a different
@@ -26,7 +21,6 @@ config CAVIUM_OCTEON_2ND_KERNEL
 
 config CAVIUM_OCTEON_HW_FIX_UNALIGNED
 	bool "Enable hardware fixups of unaligned loads and stores"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "y"
 	help
 	  Configure the Octeon hardware to automatically fix unaligned loads
@@ -38,7 +32,6 @@ config CAVIUM_OCTEON_HW_FIX_UNALIGNED
 
 config CAVIUM_OCTEON_CVMSEG_SIZE
 	int "Number of L1 cache lines reserved for CVMSEG memory"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	range 0 54
 	default 1
 	help
@@ -50,7 +43,6 @@ config CAVIUM_OCTEON_CVMSEG_SIZE
 
 config CAVIUM_OCTEON_LOCK_L2
 	bool "Lock often used kernel code in the L2"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "y"
 	help
 	  Enable locking parts of the kernel into the L2 cache.
@@ -93,7 +85,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_STATIC
-	depends on CPU_CAVIUM_OCTEON
 
 config CAVIUM_OCTEON_HELPER
 	def_bool y
@@ -107,6 +98,8 @@ config NEED_SG_DMA_LENGTH
 
 config SWIOTLB
 	def_bool y
-	depends on CPU_CAVIUM_OCTEON
 	select IOMMU_HELPER
 	select NEED_SG_DMA_LENGTH
+
+
+endif # CPU_CAVIUM_OCTEON
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 26bf71130bf8..29d56afbb02d 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -105,8 +105,7 @@ unsigned long long notrace sched_clock(void)
 void __init plat_time_init(void)
 {
 	clocksource_mips.rating = 300;
-	clocksource_set_clock(&clocksource_mips, octeon_get_clock_rate());
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, octeon_get_clock_rate());
 }
 
 static u64 octeon_udelay_factor;
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 0707fae3f0ee..2d9028f1474c 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -288,7 +288,6 @@ void octeon_user_io_init(void)
 	union octeon_cvmemctl cvmmemctl;
 	union cvmx_iob_fau_timeout fau_timeout;
 	union cvmx_pow_nw_tim nm_tim;
-	uint64_t cvmctl;
 
 	/* Get the current settings for CP0_CVMMEMCTL_REG */
 	cvmmemctl.u64 = read_c0_cvmmemctl();
@@ -392,12 +391,6 @@ void octeon_user_io_init(void)
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
 
-	/* Move the performance counter interrupts to IRQ 6 */
-	cvmctl = read_c0_cvmctl();
-	cvmctl &= ~(7 << 7);
-	cvmctl |= 6 << 7;
-	write_c0_cvmctl(cvmctl);
-
 	/* Set a default for the hardware timeouts */
 	fau_timeout.u64 = 0;
 	fau_timeout.s.tout_val = 0xfff;
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index ba78b21cc8d0..8b606423bbd7 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -37,13 +37,15 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 	uint64_t action;
 
 	/* Load the mailbox register to figure out what we're supposed to do */
-	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid));
+	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid)) & 0xffff;
 
 	/* Clear the mailbox to clear the interrupt */
 	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action);
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	/* Check if we've been told to flush the icache */
 	if (action & SMP_ICACHE_FLUSH)
@@ -200,16 +202,15 @@ void octeon_prepare_cpus(unsigned int max_cpus)
 	if (labi->labi_signature != LABI_SIGNATURE)
 		panic("The bootloader version on this board is incorrect.");
 #endif
-
-	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffffffff);
+	/*
+	 * Only the low order mailbox bits are used for IPIs, leave
+	 * the other bits alone.
+	 */
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffff);
 	if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt, IRQF_DISABLED,
-			"mailbox0", mailbox_interrupt)) {
+			"SMP-IPI", mailbox_interrupt)) {
 		panic("Cannot request_irq(OCTEON_IRQ_MBOX0)\n");
 	}
-	if (request_irq(OCTEON_IRQ_MBOX1, mailbox_interrupt, IRQF_DISABLED,
-			"mailbox1", mailbox_interrupt)) {
-		panic("Cannot request_irq(OCTEON_IRQ_MBOX1)\n");
-	}
 }
 
 /**
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 167c1d07b809..b6acd2f256b6 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -86,8 +86,8 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_EMATCH=y
 CONFIG_NET_CLS_ACT=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
@@ -329,7 +329,7 @@ CONFIG_USB_LED=m
 CONFIG_USB_GADGET=m
 CONFIG_USB_GADGET_M66592=y
 CONFIG_MMC=m
-CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_CLASS=y
 CONFIG_STAGING=y
 # CONFIG_STAGING_EXCLUDE_BUILD is not set
 CONFIG_FB_SM7XX=y
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 7270f3183bda..5527abbb7dea 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -374,7 +374,7 @@ CONFIG_FB_CIRRUS=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_HID=m
-CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_IDE_DISK=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=m
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index a97a42c6b2c8..37862b2ce363 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -225,8 +225,8 @@ CONFIG_TOSHIBA_FIR=m
 CONFIG_VLSI_FIR=m
 CONFIG_MCS_FIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
new file mode 100644
index 000000000000..e4b399fdaa61
--- /dev/null
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -0,0 +1,574 @@
+CONFIG_NLM_XLR_BOARD=y
+CONFIG_HIGHMEM=y
+CONFIG_KSM=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_SMP=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_KEXEC=y
+CONFIG_EXPERIMENTAL=y
+CONFIG_CROSS_COMPILE="mips64-unknown-linux-gnu-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_AUDIT=y
+CONFIG_NAMESPACES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="usr/dev_file_list usr/rootfs"
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_INITRAMFS_COMPRESSION_GZIP=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+# CONFIG_PCSPKR_PLATFORM is not set
+# CONFIG_PERF_EVENTS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_DEBUG=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_DECNET_NF_GRABULATOR=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_IP_DCCP=m
+CONFIG_RDS=m
+CONFIG_RDS_TCP=m
+CONFIG_TIPC=m
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_DECNET=m
+CONFIG_LLC2=m
+CONFIG_IPX=m
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=m
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+CONFIG_ECONET=m
+CONFIG_ECONET_AUNUDP=y
+CONFIG_ECONET_NATIVE=y
+CONFIG_WAN_ROUTER=m
+CONFIG_PHONET=m
+CONFIG_IEEE802154=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_DCB=y
+CONFIG_NET_PKTGEN=m
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_CDROM_PKTCDVD=y
+CONFIG_MISC_DEVICES=y
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_TGT_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_EVBUG=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_RAW=m
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_N_HDLC=m
+# CONFIG_DEVKMEM is not set
+CONFIG_STALDRV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_TIMERIOMEM=m
+CONFIG_RAW_DRIVER=m
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_UIO=y
+CONFIG_UIO_PDRV=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+CONFIG_FSCACHE_HISTOGRAM=y
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_ADFS_FS=m
+CONFIG_AFFS_FS=m
+CONFIG_ECRYPT_FS=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_VXFS_FS=m
+CONFIG_MINIX_FS=m
+CONFIG_OMFS_FS=m
+CONFIG_HPFS_FS=m
+CONFIG_QNX4FS_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EXOFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_FSCACHE=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_EXPERIMENTAL=y
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_CODA_FS=m
+CONFIG_AFS_FS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ACORN_PARTITION=y
+CONFIG_ACORN_PARTITION_ICS=y
+CONFIG_ACORN_PARTITION_RISCIX=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+CONFIG_SGI_PARTITION=y
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_SYSV68_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="cp437"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_PRINTK_TIME=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KGDB=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_LSM_MMAP_MIN_ADDR=0
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_SECURITY_SMACK=y
+CONFIG_SECURITY_TOMOYO=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC7=m
diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index 650ac9ba734c..b4db69fbc40c 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -17,6 +17,6 @@
 #define SMP_CACHE_SHIFT		L1_CACHE_SHIFT
 #define SMP_CACHE_BYTES		L1_CACHE_BYTES
 
-#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #endif /* _ASM_CACHE_H */
diff --git a/arch/mips/include/asm/cevt-r4k.h b/arch/mips/include/asm/cevt-r4k.h
index fa4328f9124f..65f9bdd02f1f 100644
--- a/arch/mips/include/asm/cevt-r4k.h
+++ b/arch/mips/include/asm/cevt-r4k.h
@@ -14,6 +14,9 @@
 #ifndef __ASM_CEVT_R4K_H
 #define __ASM_CEVT_R4K_H
 
+#include <linux/clockchips.h>
+#include <asm/time.h>
+
 DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 
 void mips_event_handler(struct clock_event_device *dev);
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 86877539c6e8..34c0d3cb116f 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -33,6 +33,7 @@
 #define PRID_COMP_TOSHIBA	0x070000
 #define PRID_COMP_LSI		0x080000
 #define PRID_COMP_LEXRA		0x0b0000
+#define PRID_COMP_NETLOGIC	0x0c0000
 #define PRID_COMP_CAVIUM	0x0d0000
 #define PRID_COMP_INGENIC	0xd00000
 
@@ -142,6 +143,31 @@
 #define PRID_IMP_JZRISC        0x0200
 
 /*
+ * These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC
+ */
+#define PRID_IMP_NETLOGIC_XLR732	0x0000
+#define PRID_IMP_NETLOGIC_XLR716	0x0200
+#define PRID_IMP_NETLOGIC_XLR532	0x0900
+#define PRID_IMP_NETLOGIC_XLR308	0x0600
+#define PRID_IMP_NETLOGIC_XLR532C	0x0800
+#define PRID_IMP_NETLOGIC_XLR516C	0x0a00
+#define PRID_IMP_NETLOGIC_XLR508C	0x0b00
+#define PRID_IMP_NETLOGIC_XLR308C	0x0f00
+#define PRID_IMP_NETLOGIC_XLS608	0x8000
+#define PRID_IMP_NETLOGIC_XLS408	0x8800
+#define PRID_IMP_NETLOGIC_XLS404	0x8c00
+#define PRID_IMP_NETLOGIC_XLS208	0x8e00
+#define PRID_IMP_NETLOGIC_XLS204	0x8f00
+#define PRID_IMP_NETLOGIC_XLS108	0xce00
+#define PRID_IMP_NETLOGIC_XLS104	0xcf00
+#define PRID_IMP_NETLOGIC_XLS616B	0x4000
+#define PRID_IMP_NETLOGIC_XLS608B	0x4a00
+#define PRID_IMP_NETLOGIC_XLS416B	0x4400
+#define PRID_IMP_NETLOGIC_XLS412B	0x4c00
+#define PRID_IMP_NETLOGIC_XLS408B	0x4e00
+#define PRID_IMP_NETLOGIC_XLS404B	0x4f00
+
+/*
  * Definitions for 7:0 on legacy processors
  */
 
@@ -234,6 +260,7 @@ enum cpu_type_enum {
 	 */
 	CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
 	CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2,
+	CPU_XLR,
 
 	CPU_LAST
 };
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 655f849bd08d..7aa37ddfca4b 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -5,7 +5,9 @@
 #include <asm/cache.h>
 #include <asm-generic/dma-coherent.h>
 
+#ifndef CONFIG_SGI_IP27	/* Kludge to fix 2.6.39 build for IP27 */
 #include <dma-coherence.h>
+#endif
 
 extern struct dma_map_ops *mips_dma_map_ops;
 
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index f5e856015329..c565b7c3f0b5 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -70,6 +70,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
+	flush_tlb_mm(vma->vm_mm);
 }
 
 static inline int huge_pte_none(pte_t pte)
diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h
index 48bb82372994..9ad011366f73 100644
--- a/arch/mips/include/asm/i8253.h
+++ b/arch/mips/include/asm/i8253.h
@@ -12,8 +12,13 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
+#define PIT_LATCH		LATCH
+
 extern raw_spinlock_t i8253_lock;
 
 extern void setup_pit_timer(void);
 
+#define inb_pit inb_p
+#define outb_pit outb_p
+
 #endif /* __ASM_I8253_H */
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 7622ccf75076..1881b316ca45 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,16 +20,18 @@
 #define WORD_INSN ".word"
 #endif
 
-#define JUMP_LABEL(key, label)						\
-	do {								\
-		asm goto("1:\tnop\n\t"					\
-			"nop\n\t"					\
-			".pushsection __jump_table,  \"a\"\n\t"		\
-			WORD_INSN " 1b, %l[" #label "], %0\n\t"		\
-			".popsection\n\t"				\
-			: :  "i" (key) :  : label);			\
-	} while (0)
-
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:\tnop\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index a6976619160a..f260ebed713b 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -161,6 +161,45 @@ static inline int alchemy_get_cputype(void)
 	return ALCHEMY_CPU_UNKNOWN;
 }
 
+/* return number of uarts on a given cputype */
+static inline int alchemy_get_uarts(int type)
+{
+	switch (type) {
+	case ALCHEMY_CPU_AU1000:
+		return 4;
+	case ALCHEMY_CPU_AU1500:
+	case ALCHEMY_CPU_AU1200:
+		return 2;
+	case ALCHEMY_CPU_AU1100:
+	case ALCHEMY_CPU_AU1550:
+		return 3;
+	}
+	return 0;
+}
+
+/* enable an UART block if it isn't already */
+static inline void alchemy_uart_enable(u32 uart_phys)
+{
+	void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys);
+
+	/* reset, enable clock, deassert reset */
+	if ((__raw_readl(addr + 0x100) & 3) != 3) {
+		__raw_writel(0, addr + 0x100);
+		wmb();
+		__raw_writel(1, addr + 0x100);
+		wmb();
+	}
+	__raw_writel(3, addr + 0x100);
+	wmb();
+}
+
+static inline void alchemy_uart_disable(u32 uart_phys)
+{
+	void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys);
+	__raw_writel(0, addr + 0x100);	/* UART_MOD_CNTRL */
+	wmb();
+}
+
 static inline void alchemy_uart_putchar(u32 uart_phys, u8 c)
 {
 	void __iomem *base = (void __iomem *)KSEG1ADDR(uart_phys);
@@ -180,6 +219,20 @@ static inline void alchemy_uart_putchar(u32 uart_phys, u8 c)
 	wmb();
 }
 
+/* return number of ethernet MACs on a given cputype */
+static inline int alchemy_get_macs(int type)
+{
+	switch (type) {
+	case ALCHEMY_CPU_AU1000:
+	case ALCHEMY_CPU_AU1500:
+	case ALCHEMY_CPU_AU1550:
+		return 2;
+	case ALCHEMY_CPU_AU1100:
+		return 1;
+	}
+	return 0;
+}
+
 /* arch/mips/au1000/common/clocks.c */
 extern void set_au1x00_speed(unsigned int new_freq);
 extern unsigned int get_au1x00_speed(void);
@@ -630,38 +683,42 @@ enum soc_au1200_ints {
 
 /*
  * Physical base addresses for integrated peripherals
+ * 0..au1000 1..au1500 2..au1100 3..au1550 4..au1200
  */
 
+#define AU1000_AC97_PHYS_ADDR		0x10000000 /* 012 */
+#define AU1000_USBD_PHYS_ADDR		0x10200000 /* 0123 */
+#define AU1000_IC0_PHYS_ADDR		0x10400000 /* 01234 */
+#define AU1000_MAC0_PHYS_ADDR		0x10500000 /* 023 */
+#define AU1000_MAC1_PHYS_ADDR		0x10510000 /* 023 */
+#define AU1000_MACEN_PHYS_ADDR		0x10520000 /* 023 */
+#define AU1100_SD0_PHYS_ADDR		0x10600000 /* 24 */
+#define AU1100_SD1_PHYS_ADDR		0x10680000 /* 24 */
+#define AU1000_I2S_PHYS_ADDR		0x11000000 /* 02 */
+#define AU1500_MAC0_PHYS_ADDR		0x11500000 /* 1 */
+#define AU1500_MAC1_PHYS_ADDR		0x11510000 /* 1 */
+#define AU1500_MACEN_PHYS_ADDR		0x11520000 /* 1 */
+#define AU1000_UART0_PHYS_ADDR		0x11100000 /* 01234 */
+#define AU1000_UART1_PHYS_ADDR		0x11200000 /* 0234 */
+#define AU1000_UART2_PHYS_ADDR		0x11300000 /* 0 */
+#define AU1000_UART3_PHYS_ADDR		0x11400000 /* 0123 */
+#define AU1500_GPIO2_PHYS_ADDR		0x11700000 /* 1234 */
+#define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
+#define AU1000_SYS_PHYS_ADDR		0x11900000 /* 01234 */
+#define AU1000_DMA_PHYS_ADDR		0x14002000 /* 012 */
+#define AU1550_DBDMA_PHYS_ADDR		0x14002000 /* 34 */
+#define AU1550_DBDMA_CONF_PHYS_ADDR	0x14003000 /* 34 */
+#define AU1000_MACDMA0_PHYS_ADDR	0x14004000 /* 0123 */
+#define AU1000_MACDMA1_PHYS_ADDR	0x14004200 /* 0123 */
+
+
 #ifdef CONFIG_SOC_AU1000
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MAC1_PHYS_ADDR		0x10510000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART2_PHYS_ADDR		0x11300000
-#define	UART3_PHYS_ADDR		0x11400000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PCMCIA_IO_PHYS_ADDR	0xF00000000ULL
 #define PCMCIA_ATTR_PHYS_ADDR	0xF40000000ULL
 #define PCMCIA_MEM_PHYS_ADDR	0xF80000000ULL
@@ -672,30 +729,8 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1500
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
-#define	MAC0_PHYS_ADDR		0x11500000
-#define	MAC1_PHYS_ADDR		0x11510000
-#define	MACEN_PHYS_ADDR		0x11520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART3_PHYS_ADDR		0x11400000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PCI_MEM_PHYS_ADDR	0x400000000ULL
 #define PCI_IO_PHYS_ADDR	0x500000000ULL
 #define PCI_CONFIG0_PHYS_ADDR	0x600000000ULL
@@ -710,34 +745,10 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1100
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define SD0_PHYS_ADDR		0x10600000
-#define SD1_PHYS_ADDR		0x10680000
-#define	IC1_PHYS_ADDR		0x11800000
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART3_PHYS_ADDR		0x11400000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define LCD_PHYS_ADDR		0x15000000
 #define PCMCIA_IO_PHYS_ADDR	0xF00000000ULL
 #define PCMCIA_ATTR_PHYS_ADDR	0xF40000000ULL
@@ -749,22 +760,8 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1550
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define	USBH_PHYS_ADDR		0x14020000
-#define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MAC1_PHYS_ADDR		0x10510000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART3_PHYS_ADDR		0x11400000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
-#define	DDMA_PHYS_ADDR		0x14002000
 #define PE_PHYS_ADDR		0x14008000
 #define PSC0_PHYS_ADDR		0x11A00000
 #define PSC1_PHYS_ADDR		0x11B00000
@@ -786,19 +783,10 @@ enum soc_au1200_ints {
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
 #define AES_PHYS_ADDR		0x10300000
 #define CIM_PHYS_ADDR		0x14004000
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define USBM_PHYS_ADDR		0x14020000
 #define	USBH_PHYS_ADDR		0x14020100
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
-#define	DDMA_PHYS_ADDR		0x14002000
 #define PSC0_PHYS_ADDR	 	0x11A00000
 #define PSC1_PHYS_ADDR	 	0x11B00000
-#define SD0_PHYS_ADDR		0x10600000
-#define SD1_PHYS_ADDR		0x10680000
 #define LCD_PHYS_ADDR		0x15000000
 #define SWCNT_PHYS_ADDR		0x1110010C
 #define MAEFE_PHYS_ADDR		0x14012000
@@ -835,183 +823,43 @@ enum soc_au1200_ints {
 #endif
 
 
-/* Interrupt Controller register offsets */
-#define IC_CFG0RD		0x40
-#define IC_CFG0SET		0x40
-#define IC_CFG0CLR		0x44
-#define IC_CFG1RD		0x48
-#define IC_CFG1SET		0x48
-#define IC_CFG1CLR		0x4C
-#define IC_CFG2RD		0x50
-#define IC_CFG2SET		0x50
-#define IC_CFG2CLR		0x54
-#define IC_REQ0INT		0x54
-#define IC_SRCRD		0x58
-#define IC_SRCSET		0x58
-#define IC_SRCCLR		0x5C
-#define IC_REQ1INT		0x5C
-#define IC_ASSIGNRD		0x60
-#define IC_ASSIGNSET		0x60
-#define IC_ASSIGNCLR		0x64
-#define IC_WAKERD		0x68
-#define IC_WAKESET		0x68
-#define IC_WAKECLR		0x6C
-#define IC_MASKRD		0x70
-#define IC_MASKSET		0x70
-#define IC_MASKCLR		0x74
-#define IC_RISINGRD		0x78
-#define IC_RISINGCLR		0x78
-#define IC_FALLINGRD		0x7C
-#define IC_FALLINGCLR		0x7C
-#define IC_TESTBIT		0x80
-
-
-/* Interrupt Controller 0 */
-#define IC0_CFG0RD		0xB0400040
-#define IC0_CFG0SET		0xB0400040
-#define IC0_CFG0CLR		0xB0400044
-
-#define IC0_CFG1RD		0xB0400048
-#define IC0_CFG1SET		0xB0400048
-#define IC0_CFG1CLR		0xB040004C
-
-#define IC0_CFG2RD		0xB0400050
-#define IC0_CFG2SET		0xB0400050
-#define IC0_CFG2CLR		0xB0400054
-
-#define IC0_REQ0INT		0xB0400054
-#define IC0_SRCRD		0xB0400058
-#define IC0_SRCSET		0xB0400058
-#define IC0_SRCCLR		0xB040005C
-#define IC0_REQ1INT		0xB040005C
-
-#define IC0_ASSIGNRD		0xB0400060
-#define IC0_ASSIGNSET		0xB0400060
-#define IC0_ASSIGNCLR		0xB0400064
-
-#define IC0_WAKERD		0xB0400068
-#define IC0_WAKESET		0xB0400068
-#define IC0_WAKECLR		0xB040006C
-
-#define IC0_MASKRD		0xB0400070
-#define IC0_MASKSET		0xB0400070
-#define IC0_MASKCLR		0xB0400074
-
-#define IC0_RISINGRD		0xB0400078
-#define IC0_RISINGCLR		0xB0400078
-#define IC0_FALLINGRD		0xB040007C
-#define IC0_FALLINGCLR		0xB040007C
-
-#define IC0_TESTBIT		0xB0400080
-
-/* Interrupt Controller 1 */
-#define IC1_CFG0RD		0xB1800040
-#define IC1_CFG0SET		0xB1800040
-#define IC1_CFG0CLR		0xB1800044
-
-#define IC1_CFG1RD		0xB1800048
-#define IC1_CFG1SET		0xB1800048
-#define IC1_CFG1CLR		0xB180004C
-
-#define IC1_CFG2RD		0xB1800050
-#define IC1_CFG2SET		0xB1800050
-#define IC1_CFG2CLR		0xB1800054
-
-#define IC1_REQ0INT		0xB1800054
-#define IC1_SRCRD		0xB1800058
-#define IC1_SRCSET		0xB1800058
-#define IC1_SRCCLR		0xB180005C
-#define IC1_REQ1INT		0xB180005C
-
-#define IC1_ASSIGNRD            0xB1800060
-#define IC1_ASSIGNSET           0xB1800060
-#define IC1_ASSIGNCLR           0xB1800064
-
-#define IC1_WAKERD		0xB1800068
-#define IC1_WAKESET		0xB1800068
-#define IC1_WAKECLR		0xB180006C
-
-#define IC1_MASKRD		0xB1800070
-#define IC1_MASKSET		0xB1800070
-#define IC1_MASKCLR		0xB1800074
-
-#define IC1_RISINGRD		0xB1800078
-#define IC1_RISINGCLR		0xB1800078
-#define IC1_FALLINGRD		0xB180007C
-#define IC1_FALLINGCLR		0xB180007C
-
-#define IC1_TESTBIT		0xB1800080
 
 
 /* Au1000 */
 #ifdef CONFIG_SOC_AU1000
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1000_USB_HOST_INT
-
-#define AU1000_ETH0_BASE	0xB0500000
-#define AU1000_ETH1_BASE	0xB0510000
-#define AU1000_MAC0_ENABLE	0xB0520000
-#define AU1000_MAC1_ENABLE	0xB0520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1000 */
 
 /* Au1500 */
 #ifdef CONFIG_SOC_AU1500
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017fffc
 #define FOR_PLATFORM_C_USB_HOST_INT AU1500_USB_HOST_INT
-
-#define AU1500_ETH0_BASE	0xB1500000
-#define AU1500_ETH1_BASE	0xB1510000
-#define AU1500_MAC0_ENABLE	0xB1520000
-#define AU1500_MAC1_ENABLE	0xB1520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1500 */
 
 /* Au1100 */
 #ifdef CONFIG_SOC_AU1100
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1100_USB_HOST_INT
-
-#define AU1100_ETH0_BASE	0xB0500000
-#define AU1100_MAC0_ENABLE	0xB0520000
-#define NUM_ETH_INTERFACES 1
 #endif /* CONFIG_SOC_AU1100 */
 
 #ifdef CONFIG_SOC_AU1550
-#define UART0_ADDR		0xB1100000
 
 #define USB_OHCI_BASE		0x14020000	/* phys addr for ioremap */
 #define USB_OHCI_LEN		0x00060000
 #define USB_HOST_CONFIG 	0xB4027ffc
 #define FOR_PLATFORM_C_USB_HOST_INT AU1550_USB_HOST_INT
-
-#define AU1550_ETH0_BASE	0xB0500000
-#define AU1550_ETH1_BASE	0xB0510000
-#define AU1550_MAC0_ENABLE	0xB0520000
-#define AU1550_MAC1_ENABLE	0xB0520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1550 */
 
 
 #ifdef CONFIG_SOC_AU1200
 
-#define UART0_ADDR		0xB1100000
-
 #define USB_UOC_BASE		0x14020020
 #define USB_UOC_LEN		0x20
 #define USB_OHCI_BASE		0x14020100
@@ -1504,22 +1352,6 @@ enum soc_au1200_ints {
 #define SYS_PINFUNC_S1B 	(1 << 2)
 #endif
 
-#define SYS_TRIOUTRD		0xB1900100
-#define SYS_TRIOUTCLR		0xB1900100
-#define SYS_OUTPUTRD		0xB1900108
-#define SYS_OUTPUTSET		0xB1900108
-#define SYS_OUTPUTCLR		0xB190010C
-#define SYS_PINSTATERD		0xB1900110
-#define SYS_PININPUTEN		0xB1900110
-
-/* GPIO2, Au1500, Au1550 only */
-#define GPIO2_BASE		0xB1700000
-#define GPIO2_DIR		(GPIO2_BASE + 0)
-#define GPIO2_OUTPUT		(GPIO2_BASE + 8)
-#define GPIO2_PINSTATE		(GPIO2_BASE + 0xC)
-#define GPIO2_INTENABLE 	(GPIO2_BASE + 0x10)
-#define GPIO2_ENABLE		(GPIO2_BASE + 0x14)
-
 /* Power Management */
 #define SYS_SCRATCH0		0xB1900018
 #define SYS_SCRATCH1		0xB190001C
@@ -1635,12 +1467,6 @@ enum soc_au1200_ints {
 #  define AC97C_RS		(1 << 1)
 #  define AC97C_CE		(1 << 0)
 
-/* Secure Digital (SD) Controller */
-#define SD0_XMIT_FIFO	0xB0600000
-#define SD0_RECV_FIFO	0xB0600004
-#define SD1_XMIT_FIFO	0xB0680000
-#define SD1_RECV_FIFO	0xB0680004
-
 #if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550)
 /* Au1500 PCI Controller */
 #define Au1500_CFG_BASE 	0xB4005000	/* virtual, KSEG1 addr */
diff --git a/arch/mips/include/asm/mach-au1x00/au1000_dma.h b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
index c333b4e1cd44..59f5b55b2200 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000_dma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
@@ -37,10 +37,6 @@
 
 #define NUM_AU1000_DMA_CHANNELS	8
 
-/* DMA Channel Base Addresses */
-#define DMA_CHANNEL_BASE	0xB4002000
-#define DMA_CHANNEL_LEN		0x00000100
-
 /* DMA Channel Register Offsets */
 #define DMA_MODE_SET		0x00000000
 #define DMA_MODE_READ		DMA_MODE_SET
diff --git a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
index c8a553a36ba4..2fdacfe85e23 100644
--- a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
@@ -37,14 +37,6 @@
 
 #ifndef _LANGUAGE_ASSEMBLY
 
-/*
- * The DMA base addresses.
- * The channels are every 256 bytes (0x0100) from the channel 0 base.
- * Interrupt status/enable is bits 15:0 for channels 15 to zero.
- */
-#define DDMA_GLOBAL_BASE	0xb4003000
-#define DDMA_CHANNEL_BASE	0xb4002000
-
 typedef volatile struct dbdma_global {
 	u32	ddma_config;
 	u32	ddma_intstat;
diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
index 62d2f136d941..1f41a522906d 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
@@ -24,6 +24,23 @@
 
 #define MAKE_IRQ(intc, off)	(AU1000_INTC##intc##_INT_BASE + (off))
 
+/* GPIO1 registers within SYS_ area */
+#define SYS_TRIOUTRD		0x100
+#define SYS_TRIOUTCLR		0x100
+#define SYS_OUTPUTRD		0x108
+#define SYS_OUTPUTSET		0x108
+#define SYS_OUTPUTCLR		0x10C
+#define SYS_PINSTATERD		0x110
+#define SYS_PININPUTEN		0x110
+
+/* register offsets within GPIO2 block */
+#define GPIO2_DIR		0x00
+#define GPIO2_OUTPUT		0x08
+#define GPIO2_PINSTATE		0x0C
+#define GPIO2_INTENABLE		0x10
+#define GPIO2_ENABLE		0x14
+
+struct gpio;
 
 static inline int au1000_gpio1_to_irq(int gpio)
 {
@@ -200,23 +217,26 @@ static inline int au1200_irq_to_gpio(int irq)
  */
 static inline void alchemy_gpio1_set_value(int gpio, int v)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
 	unsigned long r = v ? SYS_OUTPUTSET : SYS_OUTPUTCLR;
-	au_writel(mask, r);
-	au_sync();
+	__raw_writel(mask, base + r);
+	wmb();
 }
 
 static inline int alchemy_gpio1_get_value(int gpio)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
-	return au_readl(SYS_PINSTATERD) & mask;
+	return __raw_readl(base + SYS_PINSTATERD) & mask;
 }
 
 static inline int alchemy_gpio1_direction_input(int gpio)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
-	au_writel(mask, SYS_TRIOUTCLR);
-	au_sync();
+	__raw_writel(mask, base + SYS_TRIOUTCLR);
+	wmb();
 	return 0;
 }
 
@@ -257,27 +277,31 @@ static inline int alchemy_gpio1_to_irq(int gpio)
  */
 static inline void __alchemy_gpio2_mod_dir(int gpio, int to_out)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO2_BASE);
-	unsigned long d = au_readl(GPIO2_DIR);
+	unsigned long d = __raw_readl(base + GPIO2_DIR);
+
 	if (to_out)
 		d |= mask;
 	else
 		d &= ~mask;
-	au_writel(d, GPIO2_DIR);
-	au_sync();
+	__raw_writel(d, base + GPIO2_DIR);
+	wmb();
 }
 
 static inline void alchemy_gpio2_set_value(int gpio, int v)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
 	unsigned long mask;
 	mask = ((v) ? 0x00010001 : 0x00010000) << (gpio - ALCHEMY_GPIO2_BASE);
-	au_writel(mask, GPIO2_OUTPUT);
-	au_sync();
+	__raw_writel(mask, base + GPIO2_OUTPUT);
+	wmb();
 }
 
 static inline int alchemy_gpio2_get_value(int gpio)
 {
-	return au_readl(GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE));
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	return __raw_readl(base + GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE));
 }
 
 static inline int alchemy_gpio2_direction_input(int gpio)
@@ -329,21 +353,23 @@ static inline int alchemy_gpio2_to_irq(int gpio)
  */
 static inline void alchemy_gpio1_input_enable(void)
 {
-	au_writel(0, SYS_PININPUTEN);	/* the write op is key */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
+	__raw_writel(0, base + SYS_PININPUTEN);	/* the write op is key */
+	wmb();
 }
 
 /* GPIO2 shared interrupts and control */
 
 static inline void __alchemy_gpio2_mod_int(int gpio2, int en)
 {
-	unsigned long r = au_readl(GPIO2_INTENABLE);
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	unsigned long r = __raw_readl(base + GPIO2_INTENABLE);
 	if (en)
 		r |= 1 << gpio2;
 	else
 		r &= ~(1 << gpio2);
-	au_writel(r, GPIO2_INTENABLE);
-	au_sync();
+	__raw_writel(r, base + GPIO2_INTENABLE);
+	wmb();
 }
 
 /**
@@ -418,10 +444,11 @@ static inline void alchemy_gpio2_disable_int(int gpio2)
  */
 static inline void alchemy_gpio2_enable(void)
 {
-	au_writel(3, GPIO2_ENABLE);	/* reset, clock enabled */
-	au_sync();
-	au_writel(1, GPIO2_ENABLE);	/* clock enabled */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	__raw_writel(3, base + GPIO2_ENABLE);	/* reset, clock enabled */
+	wmb();
+	__raw_writel(1, base + GPIO2_ENABLE);	/* clock enabled */
+	wmb();
 }
 
 /**
@@ -431,8 +458,9 @@ static inline void alchemy_gpio2_enable(void)
  */
 static inline void alchemy_gpio2_disable(void)
 {
-	au_writel(2, GPIO2_ENABLE);	/* reset, clock disabled */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	__raw_writel(2, base + GPIO2_ENABLE);	/* reset, clock disabled */
+	wmb();
 }
 
 /**********************************************************************/
@@ -556,6 +584,16 @@ static inline void gpio_set_value(int gpio, int v)
 	alchemy_gpio_set_value(gpio, v);
 }
 
+static inline int gpio_get_value_cansleep(unsigned gpio)
+{
+	return gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value_cansleep(unsigned gpio, int value)
+{
+	gpio_set_value(gpio, value);
+}
+
 static inline int gpio_is_valid(int gpio)
 {
 	return alchemy_gpio_is_valid(gpio);
@@ -581,10 +619,50 @@ static inline int gpio_request(unsigned gpio, const char *label)
 	return 0;
 }
 
+static inline int gpio_request_one(unsigned gpio,
+					unsigned long flags, const char *label)
+{
+	return 0;
+}
+
+static inline int gpio_request_array(struct gpio *array, size_t num)
+{
+	return 0;
+}
+
 static inline void gpio_free(unsigned gpio)
 {
 }
 
+static inline void gpio_free_array(struct gpio *array, size_t num)
+{
+}
+
+static inline int gpio_set_debounce(unsigned gpio, unsigned debounce)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_export_link(struct device *dev, const char *name,
+				   unsigned gpio)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
+{
+	return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+
 #endif	/* !CONFIG_ALCHEMY_GPIO_INDIRECT */
 
 
diff --git a/arch/mips/include/asm/mach-bcm47xx/nvram.h b/arch/mips/include/asm/mach-bcm47xx/nvram.h
index 9759588ba3cf..184d5ecb5f51 100644
--- a/arch/mips/include/asm/mach-bcm47xx/nvram.h
+++ b/arch/mips/include/asm/mach-bcm47xx/nvram.h
@@ -39,8 +39,16 @@ extern int nvram_getenv(char *name, char *val, size_t val_len);
 
 static inline void nvram_parse_macaddr(char *buf, u8 *macaddr)
 {
-	sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0], &macaddr[1],
-	       &macaddr[2], &macaddr[3], &macaddr[4], &macaddr[5]);
+	if (strchr(buf, ':'))
+		sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0],
+			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
+			&macaddr[5]);
+	else if (strchr(buf, '-'))
+		sscanf(buf, "%hhx-%hhx-%hhx-%hhx-%hhx-%hhx", &macaddr[0],
+			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
+			&macaddr[5]);
+	else
+		printk(KERN_WARNING "Can not parse mac address: %s\n", buf);
 }
 
 #endif
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
index 32978d32561a..ed72e6a26b73 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
@@ -88,7 +88,7 @@ struct bcm_tag {
 	char kernel_crc[CRC_LEN];
 	/* 228-235: Unused at present */
 	char reserved1[8];
-	/* 236-239: CRC32 of header excluding tagVersion */
+	/* 236-239: CRC32 of header excluding last 20 bytes */
 	char header_crc[CRC_LEN];
 	/* 240-255: Unused at present */
 	char reserved2[16];
diff --git a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
index 0b2b5eb22e9b..dedef7d2b01f 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
@@ -63,6 +63,11 @@
 	# CN30XX Disable instruction prefetching
 	or  v0, v0, 0x2000
 skip:
+	# First clear off CvmCtl[IPPCI] bit and move the performance
+	# counters interrupt to IRQ 6
+	li	v1, ~(7 << 7)
+	and	v0, v0, v1
+	ori	v0, v0, (6 << 7)
 	# Write the cavium control register
 	dmtc0   v0, CP0_CVMCTL_REG
 	sync
diff --git a/arch/mips/include/asm/mach-lantiq/lantiq.h b/arch/mips/include/asm/mach-lantiq/lantiq.h
new file mode 100644
index 000000000000..ce2f02929d22
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/lantiq.h
@@ -0,0 +1,63 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+#ifndef _LANTIQ_H__
+#define _LANTIQ_H__
+
+#include <linux/irq.h>
+
+/* generic reg access functions */
+#define ltq_r32(reg)		__raw_readl(reg)
+#define ltq_w32(val, reg)	__raw_writel(val, reg)
+#define ltq_w32_mask(clear, set, reg)	\
+	ltq_w32((ltq_r32(reg) & ~(clear)) | (set), reg)
+#define ltq_r8(reg)		__raw_readb(reg)
+#define ltq_w8(val, reg)	__raw_writeb(val, reg)
+
+/* register access macros for EBU and CGU */
+#define ltq_ebu_w32(x, y)	ltq_w32((x), ltq_ebu_membase + (y))
+#define ltq_ebu_r32(x)		ltq_r32(ltq_ebu_membase + (x))
+#define ltq_cgu_w32(x, y)	ltq_w32((x), ltq_cgu_membase + (y))
+#define ltq_cgu_r32(x)		ltq_r32(ltq_cgu_membase + (x))
+
+extern __iomem void *ltq_ebu_membase;
+extern __iomem void *ltq_cgu_membase;
+
+extern unsigned int ltq_get_cpu_ver(void);
+extern unsigned int ltq_get_soc_type(void);
+
+/* clock speeds */
+#define CLOCK_60M	60000000
+#define CLOCK_83M	83333333
+#define CLOCK_111M	111111111
+#define CLOCK_133M	133333333
+#define CLOCK_167M	166666667
+#define CLOCK_200M	200000000
+#define CLOCK_266M	266666666
+#define CLOCK_333M	333333333
+#define CLOCK_400M	400000000
+
+/* spinlock all ebu i/o */
+extern spinlock_t ebu_lock;
+
+/* some irq helpers */
+extern void ltq_disable_irq(struct irq_data *data);
+extern void ltq_mask_and_ack_irq(struct irq_data *data);
+extern void ltq_enable_irq(struct irq_data *data);
+
+/* find out what caused the last cpu reset */
+extern int ltq_reset_cause(void);
+#define LTQ_RST_CAUSE_WDTRST	0x20
+
+#define IOPORT_RESOURCE_START	0x10000000
+#define IOPORT_RESOURCE_END	0xffffffff
+#define IOMEM_RESOURCE_START	0x10000000
+#define IOMEM_RESOURCE_END	0xffffffff
+#define LTQ_FLASH_START		0x10000000
+#define LTQ_FLASH_MAX		0x04000000
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
new file mode 100644
index 000000000000..a305f1d0259e
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
@@ -0,0 +1,53 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_PLATFORM_H__
+#define _LANTIQ_PLATFORM_H__
+
+#include <linux/mtd/partitions.h>
+#include <linux/socket.h>
+
+/* struct used to pass info to the pci core */
+enum {
+	PCI_CLOCK_INT = 0,
+	PCI_CLOCK_EXT
+};
+
+#define PCI_EXIN0	0x0001
+#define PCI_EXIN1	0x0002
+#define PCI_EXIN2	0x0004
+#define PCI_EXIN3	0x0008
+#define PCI_EXIN4	0x0010
+#define PCI_EXIN5	0x0020
+#define PCI_EXIN_MAX	6
+
+#define PCI_GNT1	0x0040
+#define PCI_GNT2	0x0080
+#define PCI_GNT3	0x0100
+#define PCI_GNT4	0x0200
+
+#define PCI_REQ1	0x0400
+#define PCI_REQ2	0x0800
+#define PCI_REQ3	0x1000
+#define PCI_REQ4	0x2000
+#define PCI_REQ_SHIFT	10
+#define PCI_REQ_MASK	0xf
+
+struct ltq_pci_data {
+	int clock;
+	int gpio;
+	int irq[16];
+};
+
+/* struct used to pass info to network drivers */
+struct ltq_eth_data {
+	struct sockaddr mac;
+	int mii_mode;
+};
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/war.h b/arch/mips/include/asm/mach-lantiq/war.h
new file mode 100644
index 000000000000..01b08ef368d1
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/war.h
@@ -0,0 +1,24 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+#ifndef __ASM_MIPS_MACH_LANTIQ_WAR_H
+#define __ASM_MIPS_MACH_LANTIQ_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR     0
+#define R4600_V1_HIT_CACHEOP_WAR        0
+#define R4600_V2_HIT_CACHEOP_WAR        0
+#define R5432_CP0_INTERRUPT_WAR         0
+#define BCM1250_M3_WAR                  0
+#define SIBYTE_1956_WAR                 0
+#define MIPS4K_ICACHE_REFILL_WAR        0
+#define MIPS_CACHE_SYNC_WAR             0
+#define TX49XX_ICACHE_INDEX_INV_WAR     0
+#define RM9000_CDEX_SMP_WAR             0
+#define ICACHE_REFILLS_WORKAROUND_WAR   0
+#define R10000_LLSC_WAR                 0
+#define MIPS34K_MISSED_ITLB_WAR         0
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/irq.h b/arch/mips/include/asm/mach-lantiq/xway/irq.h
new file mode 100644
index 000000000000..a1471d2dd0d2
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/irq.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef __LANTIQ_IRQ_H
+#define __LANTIQ_IRQ_H
+
+#include <lantiq_irq.h>
+
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
new file mode 100644
index 000000000000..b4465a888e20
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
@@ -0,0 +1,66 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_XWAY_IRQ_H__
+#define _LANTIQ_XWAY_IRQ_H__
+
+#define INT_NUM_IRQ0		8
+#define INT_NUM_IM0_IRL0	(INT_NUM_IRQ0 + 0)
+#define INT_NUM_IM1_IRL0	(INT_NUM_IRQ0 + 32)
+#define INT_NUM_IM2_IRL0	(INT_NUM_IRQ0 + 64)
+#define INT_NUM_IM3_IRL0	(INT_NUM_IRQ0 + 96)
+#define INT_NUM_IM4_IRL0	(INT_NUM_IRQ0 + 128)
+#define INT_NUM_IM_OFFSET	(INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0)
+
+#define LTQ_ASC_TIR(x)		(INT_NUM_IM3_IRL0 + (x * 8))
+#define LTQ_ASC_RIR(x)		(INT_NUM_IM3_IRL0 + (x * 8) + 1)
+#define LTQ_ASC_EIR(x)		(INT_NUM_IM3_IRL0 + (x * 8) + 2)
+
+#define LTQ_ASC_ASE_TIR		INT_NUM_IM2_IRL0
+#define LTQ_ASC_ASE_RIR		(INT_NUM_IM2_IRL0 + 2)
+#define LTQ_ASC_ASE_EIR		(INT_NUM_IM2_IRL0 + 3)
+
+#define LTQ_SSC_TIR		(INT_NUM_IM0_IRL0 + 15)
+#define LTQ_SSC_RIR		(INT_NUM_IM0_IRL0 + 14)
+#define LTQ_SSC_EIR		(INT_NUM_IM0_IRL0 + 16)
+
+#define LTQ_MEI_DYING_GASP_INT	(INT_NUM_IM1_IRL0 + 21)
+#define LTQ_MEI_INT		(INT_NUM_IM1_IRL0 + 23)
+
+#define LTQ_TIMER6_INT		(INT_NUM_IM1_IRL0 + 23)
+#define LTQ_USB_INT		(INT_NUM_IM1_IRL0 + 22)
+#define LTQ_USB_OC_INT		(INT_NUM_IM4_IRL0 + 23)
+
+#define MIPS_CPU_TIMER_IRQ		7
+
+#define LTQ_DMA_CH0_INT		(INT_NUM_IM2_IRL0)
+#define LTQ_DMA_CH1_INT		(INT_NUM_IM2_IRL0 + 1)
+#define LTQ_DMA_CH2_INT		(INT_NUM_IM2_IRL0 + 2)
+#define LTQ_DMA_CH3_INT		(INT_NUM_IM2_IRL0 + 3)
+#define LTQ_DMA_CH4_INT		(INT_NUM_IM2_IRL0 + 4)
+#define LTQ_DMA_CH5_INT		(INT_NUM_IM2_IRL0 + 5)
+#define LTQ_DMA_CH6_INT		(INT_NUM_IM2_IRL0 + 6)
+#define LTQ_DMA_CH7_INT		(INT_NUM_IM2_IRL0 + 7)
+#define LTQ_DMA_CH8_INT		(INT_NUM_IM2_IRL0 + 8)
+#define LTQ_DMA_CH9_INT		(INT_NUM_IM2_IRL0 + 9)
+#define LTQ_DMA_CH10_INT	(INT_NUM_IM2_IRL0 + 10)
+#define LTQ_DMA_CH11_INT	(INT_NUM_IM2_IRL0 + 11)
+#define LTQ_DMA_CH12_INT	(INT_NUM_IM2_IRL0 + 25)
+#define LTQ_DMA_CH13_INT	(INT_NUM_IM2_IRL0 + 26)
+#define LTQ_DMA_CH14_INT	(INT_NUM_IM2_IRL0 + 27)
+#define LTQ_DMA_CH15_INT	(INT_NUM_IM2_IRL0 + 28)
+#define LTQ_DMA_CH16_INT	(INT_NUM_IM2_IRL0 + 29)
+#define LTQ_DMA_CH17_INT	(INT_NUM_IM2_IRL0 + 30)
+#define LTQ_DMA_CH18_INT	(INT_NUM_IM2_IRL0 + 16)
+#define LTQ_DMA_CH19_INT	(INT_NUM_IM2_IRL0 + 21)
+
+#define LTQ_PPE_MBOX_INT	(INT_NUM_IM2_IRL0 + 24)
+
+#define INT_NUM_IM4_IRL14	(INT_NUM_IM4_IRL0 + 14)
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
new file mode 100644
index 000000000000..8a3c6be669d2
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
@@ -0,0 +1,141 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_XWAY_H__
+#define _LTQ_XWAY_H__
+
+#ifdef CONFIG_SOC_TYPE_XWAY
+
+#include <lantiq.h>
+
+/* Chip IDs */
+#define SOC_ID_DANUBE1		0x129
+#define SOC_ID_DANUBE2		0x12B
+#define SOC_ID_TWINPASS		0x12D
+#define SOC_ID_AMAZON_SE	0x152
+#define SOC_ID_ARX188		0x16C
+#define SOC_ID_ARX168		0x16D
+#define SOC_ID_ARX182		0x16F
+
+/* SoC Types */
+#define SOC_TYPE_DANUBE		0x01
+#define SOC_TYPE_TWINPASS	0x02
+#define SOC_TYPE_AR9		0x03
+#define SOC_TYPE_VR9		0x04
+#define SOC_TYPE_AMAZON_SE	0x05
+
+/* ASC0/1 - serial port */
+#define LTQ_ASC0_BASE_ADDR	0x1E100400
+#define LTQ_ASC1_BASE_ADDR	0x1E100C00
+#define LTQ_ASC_SIZE		0x400
+
+/* RCU - reset control unit */
+#define LTQ_RCU_BASE_ADDR	0x1F203000
+#define LTQ_RCU_SIZE		0x1000
+
+/* GPTU - general purpose timer unit */
+#define LTQ_GPTU_BASE_ADDR	0x18000300
+#define LTQ_GPTU_SIZE		0x100
+
+/* EBU - external bus unit */
+#define LTQ_EBU_GPIO_START	0x14000000
+#define LTQ_EBU_GPIO_SIZE	0x1000
+
+#define LTQ_EBU_BASE_ADDR	0x1E105300
+#define LTQ_EBU_SIZE		0x100
+
+#define LTQ_EBU_BUSCON0		0x0060
+#define LTQ_EBU_PCC_CON		0x0090
+#define LTQ_EBU_PCC_IEN		0x00A4
+#define LTQ_EBU_PCC_ISTAT	0x00A0
+#define LTQ_EBU_BUSCON1		0x0064
+#define LTQ_EBU_ADDRSEL1	0x0024
+#define EBU_WRDIS		0x80000000
+
+/* CGU - clock generation unit */
+#define LTQ_CGU_BASE_ADDR	0x1F103000
+#define LTQ_CGU_SIZE		0x1000
+
+/* ICU - interrupt control unit */
+#define LTQ_ICU_BASE_ADDR	0x1F880200
+#define LTQ_ICU_SIZE		0x100
+
+/* EIU - external interrupt unit */
+#define LTQ_EIU_BASE_ADDR	0x1F101000
+#define LTQ_EIU_SIZE		0x1000
+
+/* PMU - power management unit */
+#define LTQ_PMU_BASE_ADDR	0x1F102000
+#define LTQ_PMU_SIZE		0x1000
+
+#define PMU_DMA			0x0020
+#define PMU_USB			0x8041
+#define PMU_LED			0x0800
+#define PMU_GPT			0x1000
+#define PMU_PPE			0x2000
+#define PMU_FPI			0x4000
+#define PMU_SWITCH		0x10000000
+
+/* ETOP - ethernet */
+#define LTQ_ETOP_BASE_ADDR	0x1E180000
+#define LTQ_ETOP_SIZE		0x40000
+
+/* DMA */
+#define LTQ_DMA_BASE_ADDR	0x1E104100
+#define LTQ_DMA_SIZE		0x800
+
+/* PCI */
+#define PCI_CR_BASE_ADDR	0x1E105400
+#define PCI_CR_SIZE		0x400
+
+/* WDT */
+#define LTQ_WDT_BASE_ADDR	0x1F8803F0
+#define LTQ_WDT_SIZE		0x10
+
+/* STP - serial to parallel conversion unit */
+#define LTQ_STP_BASE_ADDR	0x1E100BB0
+#define LTQ_STP_SIZE		0x40
+
+/* GPIO */
+#define LTQ_GPIO0_BASE_ADDR	0x1E100B10
+#define LTQ_GPIO1_BASE_ADDR	0x1E100B40
+#define LTQ_GPIO2_BASE_ADDR	0x1E100B70
+#define LTQ_GPIO_SIZE		0x30
+
+/* SSC */
+#define LTQ_SSC_BASE_ADDR	0x1e100800
+#define LTQ_SSC_SIZE		0x100
+
+/* MEI - dsl core */
+#define LTQ_MEI_BASE_ADDR	0x1E116000
+
+/* DEU - data encryption unit */
+#define LTQ_DEU_BASE_ADDR	0x1E103100
+
+/* MPS - multi processor unit (voice) */
+#define LTQ_MPS_BASE_ADDR	(KSEG1 + 0x1F107000)
+#define LTQ_MPS_CHIPID		((u32 *)(LTQ_MPS_BASE_ADDR + 0x0344))
+
+/* request a non-gpio and set the PIO config */
+extern int  ltq_gpio_request(unsigned int pin, unsigned int alt0,
+	unsigned int alt1, unsigned int dir, const char *name);
+extern void ltq_pmu_enable(unsigned int module);
+extern void ltq_pmu_disable(unsigned int module);
+
+static inline int ltq_is_ar9(void)
+{
+	return (ltq_get_soc_type() == SOC_TYPE_AR9);
+}
+
+static inline int ltq_is_vr9(void)
+{
+	return (ltq_get_soc_type() == SOC_TYPE_VR9);
+}
+
+#endif /* CONFIG_SOC_TYPE_XWAY */
+#endif /* _LTQ_XWAY_H__ */
diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
new file mode 100644
index 000000000000..872943a4b90e
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
@@ -0,0 +1,60 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef LTQ_DMA_H__
+#define LTQ_DMA_H__
+
+#define LTQ_DESC_SIZE		0x08	/* each descriptor is 64bit */
+#define LTQ_DESC_NUM		0x40	/* 64 descriptors / channel */
+
+#define LTQ_DMA_OWN		BIT(31)	/* owner bit */
+#define LTQ_DMA_C		BIT(30) /* complete bit */
+#define LTQ_DMA_SOP		BIT(29) /* start of packet */
+#define LTQ_DMA_EOP		BIT(28) /* end of packet */
+#define LTQ_DMA_TX_OFFSET(x)	((x & 0x1f) << 23) /* data bytes offset */
+#define LTQ_DMA_RX_OFFSET(x)	((x & 0x7) << 23) /* data bytes offset */
+#define LTQ_DMA_SIZE_MASK	(0xffff) /* the size field is 16 bit */
+
+struct ltq_dma_desc {
+	u32 ctl;
+	u32 addr;
+};
+
+struct ltq_dma_channel {
+	int nr;				/* the channel number */
+	int irq;			/* the mapped irq */
+	int desc;			/* the current descriptor */
+	struct ltq_dma_desc *desc_base;	/* the descriptor base */
+	int phys;			/* physical addr */
+};
+
+enum {
+	DMA_PORT_ETOP = 0,
+	DMA_PORT_DEU,
+};
+
+extern void ltq_dma_enable_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_disable_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_ack_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_open(struct ltq_dma_channel *ch);
+extern void ltq_dma_close(struct ltq_dma_channel *ch);
+extern void ltq_dma_alloc_tx(struct ltq_dma_channel *ch);
+extern void ltq_dma_alloc_rx(struct ltq_dma_channel *ch);
+extern void ltq_dma_free(struct ltq_dma_channel *ch);
+extern void ltq_dma_init_port(int p);
+
+#endif
diff --git a/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
new file mode 100644
index 000000000000..3b728275b9b0
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
@@ -0,0 +1,47 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems
+ * Copyright (C) 2003 Ralf Baechle
+ */
+#ifndef __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_4kex		1
+#define cpu_has_4k_cache	1
+#define cpu_has_watch		1
+#define cpu_has_mips16		0
+#define cpu_has_counter		1
+#define cpu_has_divec		1
+#define cpu_has_vce		0
+#define cpu_has_cache_cdex_p	0
+#define cpu_has_cache_cdex_s	0
+#define cpu_has_prefetch	1
+#define cpu_has_mcheck		1
+#define cpu_has_ejtag		1
+
+#define cpu_has_llsc		1
+#define cpu_has_vtag_icache	0
+#define cpu_has_dc_aliases	0
+#define cpu_has_ic_fills_f_dc	0
+#define cpu_has_dsp		0
+#define cpu_has_mipsmt		0
+#define cpu_has_userlocal	0
+#define cpu_icache_snoops_remote_store	0
+
+#define cpu_has_nofpuex		0
+#define cpu_has_64bits		1
+
+#define cpu_has_mips32r1	1
+#define cpu_has_mips32r2	0
+#define cpu_has_mips64r1	1
+#define cpu_has_mips64r2	0
+
+#define cpu_has_inclusive_pcaches	0
+
+#define cpu_dcache_line_size()	32
+#define cpu_icache_line_size()	32
+
+#endif /* __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-netlogic/irq.h b/arch/mips/include/asm/mach-netlogic/irq.h
new file mode 100644
index 000000000000..b5902458e7c1
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/irq.h
@@ -0,0 +1,14 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems.
+ */
+#ifndef __ASM_NETLOGIC_IRQ_H
+#define __ASM_NETLOGIC_IRQ_H
+
+#define NR_IRQS			64
+#define MIPS_CPU_IRQ_BASE	0
+
+#endif /* __ASM_NETLOGIC_IRQ_H */
diff --git a/arch/mips/include/asm/mach-netlogic/war.h b/arch/mips/include/asm/mach-netlogic/war.h
new file mode 100644
index 000000000000..22da89327352
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/war.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems.
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __ASM_MIPS_MACH_NLM_WAR_H
+#define __ASM_MIPS_MACH_NLM_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define RM9000_CDEX_SMP_WAR		0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+#define R10000_LLSC_WAR			0
+#define MIPS34K_MISSED_ITLB_WAR		0
+
+#endif /* __ASM_MIPS_MACH_NLM_WAR_H */
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index d94085a3eafb..bc01a02cacd8 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -118,6 +118,8 @@ search_module_dbetables(unsigned long addr)
 #define MODULE_PROC_FAMILY "LOONGSON2 "
 #elif defined CONFIG_CPU_CAVIUM_OCTEON
 #define MODULE_PROC_FAMILY "OCTEON "
+#elif defined CONFIG_CPU_XLR
+#define MODULE_PROC_FAMILY "XLR "
 #else
 #error MODULE_PROC_FAMILY undefined for your processor configuration
 #endif
diff --git a/arch/mips/include/asm/netlogic/interrupt.h b/arch/mips/include/asm/netlogic/interrupt.h
new file mode 100644
index 000000000000..a85aadb6cfd7
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/interrupt.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_INTERRUPT_H
+#define _ASM_NLM_INTERRUPT_H
+
+/* Defines for the IRQ numbers */
+
+#define IRQ_IPI_SMP_FUNCTION	3
+#define IRQ_IPI_SMP_RESCHEDULE	4
+#define IRQ_MSGRING		6
+#define IRQ_TIMER		7
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/mips-extns.h b/arch/mips/include/asm/netlogic/mips-extns.h
new file mode 100644
index 000000000000..8c53d0ba4bf2
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/mips-extns.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_MIPS_EXTS_H
+#define _ASM_NLM_MIPS_EXTS_H
+
+/*
+ * XLR and XLP interrupt request and interrupt mask registers
+ */
+#define read_c0_eirr()		__read_64bit_c0_register($9, 6)
+#define read_c0_eimr()		__read_64bit_c0_register($9, 7)
+#define write_c0_eirr(val)	__write_64bit_c0_register($9, 6, val)
+
+/*
+ * Writing EIMR in 32 bit is a special case, the lower 8 bit of the
+ * EIMR is shadowed in the status register, so we cannot save and
+ * restore status register for split read.
+ */
+#define write_c0_eimr(val)						\
+do {									\
+	if (sizeof(unsigned long) == 4)	{				\
+		unsigned long __flags;					\
+									\
+		local_irq_save(__flags);				\
+		__asm__ __volatile__(					\
+			".set\tmips64\n\t"				\
+			"dsll\t%L0, %L0, 32\n\t"			\
+			"dsrl\t%L0, %L0, 32\n\t"			\
+			"dsll\t%M0, %M0, 32\n\t"			\
+			"or\t%L0, %L0, %M0\n\t"				\
+			"dmtc0\t%L0, $9, 7\n\t"				\
+			".set\tmips0"					\
+			: : "r" (val));					\
+		__flags = (__flags & 0xffff00ff) | (((val) & 0xff) << 8);\
+		local_irq_restore(__flags);				\
+	} else								\
+		__write_64bit_c0_register($9, 7, (val));		\
+} while (0)
+
+static inline int hard_smp_processor_id(void)
+{
+	return __read_32bit_c0_register($15, 1) & 0x3ff;
+}
+
+#endif /*_ASM_NLM_MIPS_EXTS_H */
diff --git a/arch/mips/include/asm/netlogic/psb-bootinfo.h b/arch/mips/include/asm/netlogic/psb-bootinfo.h
new file mode 100644
index 000000000000..6878307f0ee6
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/psb-bootinfo.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NETLOGIC_BOOTINFO_H
+#define _ASM_NETLOGIC_BOOTINFO_H
+
+struct psb_info {
+	uint64_t boot_level;
+	uint64_t io_base;
+	uint64_t output_device;
+	uint64_t uart_print;
+	uint64_t led_output;
+	uint64_t init;
+	uint64_t exit;
+	uint64_t warm_reset;
+	uint64_t wakeup;
+	uint64_t online_cpu_map;
+	uint64_t master_reentry_sp;
+	uint64_t master_reentry_gp;
+	uint64_t master_reentry_fn;
+	uint64_t slave_reentry_fn;
+	uint64_t magic_dword;
+	uint64_t uart_putchar;
+	uint64_t size;
+	uint64_t uart_getchar;
+	uint64_t nmi_handler;
+	uint64_t psb_version;
+	uint64_t mac_addr;
+	uint64_t cpu_frequency;
+	uint64_t board_version;
+	uint64_t malloc;
+	uint64_t free;
+	uint64_t global_shmem_addr;
+	uint64_t global_shmem_size;
+	uint64_t psb_os_cpu_map;
+	uint64_t userapp_cpu_map;
+	uint64_t wakeup_os;
+	uint64_t psb_mem_map;
+	uint64_t board_major_version;
+	uint64_t board_minor_version;
+	uint64_t board_manf_revision;
+	uint64_t board_serial_number;
+	uint64_t psb_physaddr_map;
+	uint64_t xlr_loaderip_config;
+	uint64_t bldr_envp;
+	uint64_t avail_mem_map;
+};
+
+enum {
+	NETLOGIC_IO_SPACE = 0x10,
+	PCIX_IO_SPACE,
+	PCIX_CFG_SPACE,
+	PCIX_MEMORY_SPACE,
+	HT_IO_SPACE,
+	HT_CFG_SPACE,
+	HT_MEMORY_SPACE,
+	SRAM_SPACE,
+	FLASH_CONTROLLER_SPACE
+};
+
+#define NLM_MAX_ARGS	64
+#define NLM_MAX_ENVS	32
+
+/* This is what netlboot passes and linux boot_mem_map is subtly different */
+#define NLM_BOOT_MEM_MAP_MAX	32
+struct nlm_boot_mem_map {
+	int nr_map;
+	struct nlm_boot_mem_map_entry {
+		uint64_t addr;		/* start of memory segment */
+		uint64_t size;		/* size of memory segment */
+		uint32_t type;		/* type of memory segment */
+	} map[NLM_BOOT_MEM_MAP_MAX];
+};
+
+/* Pointer to saved boot loader info */
+extern struct psb_info nlm_prom_info;
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/gpio.h b/arch/mips/include/asm/netlogic/xlr/gpio.h
new file mode 100644
index 000000000000..51f6ad4aeb14
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/gpio.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_GPIO_H
+#define _ASM_NLM_GPIO_H
+
+#define NETLOGIC_GPIO_INT_EN_REG		0
+#define NETLOGIC_GPIO_INPUT_INVERSION_REG	1
+#define NETLOGIC_GPIO_IO_DIR_REG		2
+#define NETLOGIC_GPIO_IO_DATA_WR_REG		3
+#define NETLOGIC_GPIO_IO_DATA_RD_REG		4
+
+#define NETLOGIC_GPIO_SWRESET_REG		8
+#define NETLOGIC_GPIO_DRAM1_CNTRL_REG		9
+#define NETLOGIC_GPIO_DRAM1_RATIO_REG		10
+#define NETLOGIC_GPIO_DRAM1_RESET_REG		11
+#define NETLOGIC_GPIO_DRAM1_STATUS_REG		12
+#define NETLOGIC_GPIO_DRAM2_CNTRL_REG		13
+#define NETLOGIC_GPIO_DRAM2_RATIO_REG		14
+#define NETLOGIC_GPIO_DRAM2_RESET_REG		15
+#define NETLOGIC_GPIO_DRAM2_STATUS_REG		16
+
+#define NETLOGIC_GPIO_PWRON_RESET_CFG_REG	21
+#define NETLOGIC_GPIO_BIST_ALL_GO_STATUS_REG	24
+#define NETLOGIC_GPIO_BIST_CPU_GO_STATUS_REG	25
+#define NETLOGIC_GPIO_BIST_DEV_GO_STATUS_REG	26
+
+#define NETLOGIC_GPIO_FUSE_BANK_REG		35
+#define NETLOGIC_GPIO_CPU_RESET_REG		40
+#define NETLOGIC_GPIO_RNG_REG			43
+
+#define NETLOGIC_PWRON_RESET_PCMCIA_BOOT	17
+#define NETLOGIC_GPIO_LED_BITMAP	0x1700000
+#define NETLOGIC_GPIO_LED_0_SHIFT		20
+#define NETLOGIC_GPIO_LED_1_SHIFT		24
+
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_RESET	0x01
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_HARD_RESET 0x02
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_SOFT_RESET 0x03
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_MAIN	0x04
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/iomap.h b/arch/mips/include/asm/netlogic/xlr/iomap.h
new file mode 100644
index 000000000000..2e3a4dd53045
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/iomap.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_IOMAP_H
+#define _ASM_NLM_IOMAP_H
+
+#define DEFAULT_NETLOGIC_IO_BASE           CKSEG1ADDR(0x1ef00000)
+#define NETLOGIC_IO_DDR2_CHN0_OFFSET       0x01000
+#define NETLOGIC_IO_DDR2_CHN1_OFFSET       0x02000
+#define NETLOGIC_IO_DDR2_CHN2_OFFSET       0x03000
+#define NETLOGIC_IO_DDR2_CHN3_OFFSET       0x04000
+#define NETLOGIC_IO_PIC_OFFSET             0x08000
+#define NETLOGIC_IO_UART_0_OFFSET          0x14000
+#define NETLOGIC_IO_UART_1_OFFSET          0x15100
+
+#define NETLOGIC_IO_SIZE                   0x1000
+
+#define NETLOGIC_IO_BRIDGE_OFFSET          0x00000
+
+#define NETLOGIC_IO_RLD2_CHN0_OFFSET       0x05000
+#define NETLOGIC_IO_RLD2_CHN1_OFFSET       0x06000
+
+#define NETLOGIC_IO_SRAM_OFFSET            0x07000
+
+#define NETLOGIC_IO_PCIX_OFFSET            0x09000
+#define NETLOGIC_IO_HT_OFFSET              0x0A000
+
+#define NETLOGIC_IO_SECURITY_OFFSET        0x0B000
+
+#define NETLOGIC_IO_GMAC_0_OFFSET          0x0C000
+#define NETLOGIC_IO_GMAC_1_OFFSET          0x0D000
+#define NETLOGIC_IO_GMAC_2_OFFSET          0x0E000
+#define NETLOGIC_IO_GMAC_3_OFFSET          0x0F000
+
+/* XLS devices */
+#define NETLOGIC_IO_GMAC_4_OFFSET          0x20000
+#define NETLOGIC_IO_GMAC_5_OFFSET          0x21000
+#define NETLOGIC_IO_GMAC_6_OFFSET          0x22000
+#define NETLOGIC_IO_GMAC_7_OFFSET          0x23000
+
+#define NETLOGIC_IO_PCIE_0_OFFSET          0x1E000
+#define NETLOGIC_IO_PCIE_1_OFFSET          0x1F000
+#define NETLOGIC_IO_SRIO_0_OFFSET          0x1E000
+#define NETLOGIC_IO_SRIO_1_OFFSET          0x1F000
+
+#define NETLOGIC_IO_USB_0_OFFSET           0x24000
+#define NETLOGIC_IO_USB_1_OFFSET           0x25000
+
+#define NETLOGIC_IO_COMP_OFFSET            0x1D000
+/* end XLS devices */
+
+/* XLR devices */
+#define NETLOGIC_IO_SPI4_0_OFFSET          0x10000
+#define NETLOGIC_IO_XGMAC_0_OFFSET         0x11000
+#define NETLOGIC_IO_SPI4_1_OFFSET          0x12000
+#define NETLOGIC_IO_XGMAC_1_OFFSET         0x13000
+/* end XLR devices */
+
+#define NETLOGIC_IO_I2C_0_OFFSET           0x16000
+#define NETLOGIC_IO_I2C_1_OFFSET           0x17000
+
+#define NETLOGIC_IO_GPIO_OFFSET            0x18000
+#define NETLOGIC_IO_FLASH_OFFSET           0x19000
+#define NETLOGIC_IO_TB_OFFSET              0x1C000
+
+#define NETLOGIC_CPLD_OFFSET               KSEG1ADDR(0x1d840000)
+
+/*
+ * Base Address (Virtual) of the PCI Config address space
+ * For now, choose 256M phys in kseg1 = 0xA0000000 + (1<<28)
+ * Config space spans 256 (num of buses) * 256 (num functions) * 256 bytes
+ * ie 1<<24 = 16M
+ */
+#define DEFAULT_PCI_CONFIG_BASE         0x18000000
+#define DEFAULT_HT_TYPE0_CFG_BASE       0x16000000
+#define DEFAULT_HT_TYPE1_CFG_BASE       0x17000000
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+typedef volatile __u32 nlm_reg_t;
+extern unsigned long netlogic_io_base;
+
+/* FIXME read once in write_reg */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define netlogic_read_reg(base, offset)		((base)[(offset)])
+#define netlogic_write_reg(base, offset, value)	((base)[(offset)] = (value))
+#else
+#define netlogic_read_reg(base, offset)		(be32_to_cpu((base)[(offset)]))
+#define netlogic_write_reg(base, offset, value) \
+				((base)[(offset)] = cpu_to_be32((value)))
+#endif
+
+#define netlogic_read_reg_le32(base, offset) (le32_to_cpu((base)[(offset)]))
+#define netlogic_write_reg_le32(base, offset, value) \
+				((base)[(offset)] = cpu_to_le32((value)))
+#define netlogic_io_mmio(offset) ((nlm_reg_t *)(netlogic_io_base+(offset)))
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/pic.h b/arch/mips/include/asm/netlogic/xlr/pic.h
new file mode 100644
index 000000000000..5cceb746f080
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/pic.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_XLR_PIC_H
+#define _ASM_NLM_XLR_PIC_H
+
+#define PIC_CLKS_PER_SEC		66666666ULL
+/* PIC hardware interrupt numbers */
+#define PIC_IRT_WD_INDEX		0
+#define PIC_IRT_TIMER_0_INDEX		1
+#define PIC_IRT_TIMER_1_INDEX		2
+#define PIC_IRT_TIMER_2_INDEX		3
+#define PIC_IRT_TIMER_3_INDEX		4
+#define PIC_IRT_TIMER_4_INDEX		5
+#define PIC_IRT_TIMER_5_INDEX		6
+#define PIC_IRT_TIMER_6_INDEX		7
+#define PIC_IRT_TIMER_7_INDEX		8
+#define PIC_IRT_CLOCK_INDEX		PIC_IRT_TIMER_7_INDEX
+#define PIC_IRT_UART_0_INDEX		9
+#define PIC_IRT_UART_1_INDEX		10
+#define PIC_IRT_I2C_0_INDEX		11
+#define PIC_IRT_I2C_1_INDEX		12
+#define PIC_IRT_PCMCIA_INDEX		13
+#define PIC_IRT_GPIO_INDEX		14
+#define PIC_IRT_HYPER_INDEX		15
+#define PIC_IRT_PCIX_INDEX		16
+/* XLS */
+#define PIC_IRT_CDE_INDEX		15
+#define PIC_IRT_BRIDGE_TB_XLS_INDEX	16
+/* XLS */
+#define PIC_IRT_GMAC0_INDEX		17
+#define PIC_IRT_GMAC1_INDEX		18
+#define PIC_IRT_GMAC2_INDEX		19
+#define PIC_IRT_GMAC3_INDEX		20
+#define PIC_IRT_XGS0_INDEX		21
+#define PIC_IRT_XGS1_INDEX		22
+#define PIC_IRT_HYPER_FATAL_INDEX	23
+#define PIC_IRT_PCIX_FATAL_INDEX	24
+#define PIC_IRT_BRIDGE_AERR_INDEX	25
+#define PIC_IRT_BRIDGE_BERR_INDEX	26
+#define PIC_IRT_BRIDGE_TB_XLR_INDEX	27
+#define PIC_IRT_BRIDGE_AERR_NMI_INDEX	28
+/* XLS */
+#define PIC_IRT_GMAC4_INDEX		21
+#define PIC_IRT_GMAC5_INDEX		22
+#define PIC_IRT_GMAC6_INDEX		23
+#define PIC_IRT_GMAC7_INDEX		24
+#define PIC_IRT_BRIDGE_ERR_INDEX	25
+#define PIC_IRT_PCIE_LINK0_INDEX	26
+#define PIC_IRT_PCIE_LINK1_INDEX	27
+#define PIC_IRT_PCIE_LINK2_INDEX	23
+#define PIC_IRT_PCIE_LINK3_INDEX	24
+#define PIC_IRT_PCIE_XLSB0_LINK2_INDEX	28
+#define PIC_IRT_PCIE_XLSB0_LINK3_INDEX	29
+#define PIC_IRT_SRIO_LINK0_INDEX	26
+#define PIC_IRT_SRIO_LINK1_INDEX	27
+#define PIC_IRT_SRIO_LINK2_INDEX	28
+#define PIC_IRT_SRIO_LINK3_INDEX	29
+#define PIC_IRT_PCIE_INT_INDEX		28
+#define PIC_IRT_PCIE_FATAL_INDEX	29
+#define PIC_IRT_GPIO_B_INDEX		30
+#define PIC_IRT_USB_INDEX		31
+/* XLS */
+#define PIC_NUM_IRTS			32
+
+
+#define PIC_CLOCK_TIMER			7
+
+/* PIC Registers */
+#define PIC_CTRL			0x00
+#define PIC_IPI				0x04
+#define PIC_INT_ACK			0x06
+
+#define WD_MAX_VAL_0			0x08
+#define WD_MAX_VAL_1			0x09
+#define WD_MASK_0			0x0a
+#define WD_MASK_1			0x0b
+#define WD_HEARBEAT_0			0x0c
+#define WD_HEARBEAT_1			0x0d
+
+#define PIC_IRT_0_BASE			0x40
+#define PIC_IRT_1_BASE			0x80
+#define PIC_TIMER_MAXVAL_0_BASE		0x100
+#define PIC_TIMER_MAXVAL_1_BASE		0x110
+#define PIC_TIMER_COUNT_0_BASE		0x120
+#define PIC_TIMER_COUNT_1_BASE		0x130
+
+#define PIC_IRT_0(picintr)      (PIC_IRT_0_BASE + (picintr))
+#define PIC_IRT_1(picintr)	(PIC_IRT_1_BASE + (picintr))
+
+#define PIC_TIMER_MAXVAL_0(i)	(PIC_TIMER_MAXVAL_0_BASE + (i))
+#define PIC_TIMER_MAXVAL_1(i)	(PIC_TIMER_MAXVAL_1_BASE + (i))
+#define PIC_TIMER_COUNT_0(i)	(PIC_TIMER_COUNT_0_BASE + (i))
+#define PIC_TIMER_COUNT_1(i)	(PIC_TIMER_COUNT_0_BASE + (i))
+
+/*
+ * Mapping between hardware interrupt numbers and IRQs on CPU
+ * we use a simple scheme to map PIC interrupts 0-31 to IRQs
+ * 8-39. This leaves the IRQ 0-7 for cpu interrupts like
+ * count/compare and FMN
+ */
+#define PIC_IRQ_BASE            8
+#define PIC_INTR_TO_IRQ(i)      (PIC_IRQ_BASE + (i))
+#define PIC_IRQ_TO_INTR(i)      ((i) - PIC_IRQ_BASE)
+
+#define PIC_IRT_FIRST_IRQ	PIC_IRQ_BASE
+#define PIC_WD_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_WD_INDEX)
+#define PIC_TIMER_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_0_INDEX)
+#define PIC_TIMER_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_1_INDEX)
+#define PIC_TIMER_2_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_2_INDEX)
+#define PIC_TIMER_3_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_3_INDEX)
+#define PIC_TIMER_4_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_4_INDEX)
+#define PIC_TIMER_5_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_5_INDEX)
+#define PIC_TIMER_6_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_6_INDEX)
+#define PIC_TIMER_7_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_7_INDEX)
+#define PIC_CLOCK_IRQ		(PIC_TIMER_7_IRQ)
+#define PIC_UART_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_UART_0_INDEX)
+#define PIC_UART_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_UART_1_INDEX)
+#define PIC_I2C_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_I2C_0_INDEX)
+#define PIC_I2C_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_I2C_1_INDEX)
+#define PIC_PCMCIA_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_PCMCIA_INDEX)
+#define PIC_GPIO_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GPIO_INDEX)
+#define PIC_HYPER_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_HYPER_INDEX)
+#define PIC_PCIX_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_PCIX_INDEX)
+/* XLS */
+#define PIC_CDE_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_CDE_INDEX)
+#define PIC_BRIDGE_TB_XLS_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLS_INDEX)
+/* end XLS */
+#define PIC_GMAC_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC0_INDEX)
+#define PIC_GMAC_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC1_INDEX)
+#define PIC_GMAC_2_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC2_INDEX)
+#define PIC_GMAC_3_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC3_INDEX)
+#define PIC_XGS_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_XGS0_INDEX)
+#define PIC_XGS_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_XGS1_INDEX)
+#define PIC_HYPER_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_HYPER_FATAL_INDEX)
+#define PIC_PCIX_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIX_FATAL_INDEX)
+#define PIC_BRIDGE_AERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_INDEX)
+#define PIC_BRIDGE_BERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_BERR_INDEX)
+#define PIC_BRIDGE_TB_XLR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLR_INDEX)
+#define PIC_BRIDGE_AERR_NMI_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_NMI_INDEX)
+/* XLS defines */
+#define PIC_GMAC_4_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC4_INDEX)
+#define PIC_GMAC_5_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC5_INDEX)
+#define PIC_GMAC_6_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC6_INDEX)
+#define PIC_GMAC_7_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC7_INDEX)
+#define PIC_BRIDGE_ERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_ERR_INDEX)
+#define PIC_PCIE_LINK0_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK0_INDEX)
+#define PIC_PCIE_LINK1_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK1_INDEX)
+#define PIC_PCIE_LINK2_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK2_INDEX)
+#define PIC_PCIE_LINK3_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK3_INDEX)
+#define PIC_PCIE_XLSB0_LINK2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK2_INDEX)
+#define PIC_PCIE_XLSB0_LINK3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK3_INDEX)
+#define PIC_SRIO_LINK0_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK0_INDEX)
+#define PIC_SRIO_LINK1_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK1_INDEX)
+#define PIC_SRIO_LINK2_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK2_INDEX)
+#define PIC_SRIO_LINK3_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK3_INDEX)
+#define PIC_PCIE_INT_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_INT__INDEX)
+#define PIC_PCIE_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_FATAL_INDEX)
+#define PIC_GPIO_B_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GPIO_B_INDEX)
+#define PIC_USB_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_USB_INDEX)
+#define PIC_IRT_LAST_IRQ	PIC_USB_IRQ
+/* end XLS */
+
+#ifndef __ASSEMBLY__
+static inline void pic_send_ipi(u32 ipi)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_IPI, ipi);
+}
+
+static inline u32 pic_read_control(void)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	return netlogic_read_reg(mmio, PIC_CTRL);
+}
+
+static inline void pic_write_control(u32 control)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_CTRL, control);
+}
+
+static inline void pic_update_control(u32 control)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_CTRL,
+		(control | netlogic_read_reg(mmio, PIC_CTRL)));
+}
+
+#define PIC_IRQ_IS_EDGE_TRIGGERED(irq)	(((irq) >= PIC_TIMER_0_IRQ) && \
+					((irq) <= PIC_TIMER_7_IRQ))
+#define PIC_IRQ_IS_IRT(irq)		(((irq) >= PIC_IRT_FIRST_IRQ) && \
+					((irq) <= PIC_IRT_LAST_IRQ))
+#endif
+
+#endif /* _ASM_NLM_XLR_PIC_H */
diff --git a/arch/mips/include/asm/netlogic/xlr/xlr.h b/arch/mips/include/asm/netlogic/xlr/xlr.h
new file mode 100644
index 000000000000..3e6372692a04
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/xlr.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_XLR_H
+#define _ASM_NLM_XLR_H
+
+/* Platform UART functions */
+struct uart_port;
+unsigned int nlm_xlr_uart_in(struct uart_port *, int);
+void nlm_xlr_uart_out(struct uart_port *, int, int);
+
+/* SMP support functions */
+struct irq_desc;
+void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc);
+void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc);
+int nlm_wakeup_secondary_cpus(u32 wakeup_mask);
+void nlm_smp_irq_init(void);
+void nlm_boot_smp_nmi(void);
+void prom_pre_boot_secondary_cpus(void);
+
+extern struct plat_smp_ops nlm_smp_ops;
+extern unsigned long nlm_common_ebase;
+
+/* XLS B silicon "Rook" */
+static inline unsigned int nlm_chip_is_xls_b(void)
+{
+	uint32_t prid = read_c0_prid();
+
+	return ((prid & 0xf000) == 0x4000);
+}
+
+/*
+ *  XLR chip types
+ */
+ /* The XLS product line has chip versions 0x[48c]? */
+static inline unsigned int nlm_chip_is_xls(void)
+{
+	uint32_t prid = read_c0_prid();
+
+	return ((prid & 0xf000) == 0x8000 || (prid & 0xf000) == 0x4000 ||
+		(prid & 0xf000) == 0xc000);
+}
+
+#endif /* _ASM_NLM_XLR_H */
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 9f1b8dba2c81..de39b1f343ea 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -141,7 +141,8 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
 #define instruction_pointer(regs) ((regs)->cp0_epc)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit);
+extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
+extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
 extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET;
 
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index d71160de4d10..97f8bf6639e7 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -149,6 +149,9 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define _TIF_FPUBOUND		(1<<TIF_FPUBOUND)
 #define _TIF_LOAD_WATCH		(1<<TIF_LOAD_WATCH)
 
+/* work to do in syscall_trace_leave() */
+#define _TIF_WORK_SYSCALL_EXIT	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
+
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		(0x0000ffef &				\
 					~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT))
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index c7f1bfef1574..bc14447e69b5 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -84,12 +84,6 @@ static inline int init_mips_clocksource(void)
 #endif
 }
 
-static inline void clocksource_set_clock(struct clocksource *cs,
-					 unsigned int clock)
-{
-	clocksource_calc_mult_shift(cs, clock, 4);
-}
-
 static inline void clockevent_set_clock(struct clock_event_device *cd,
 					unsigned int clock)
 {
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 9ce9f64cb76f..2d8e447cb828 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(vdma_free);
  */
 int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size)
 {
-	int first, pages, npages;
+	int first, pages;
 
 	if (laddr > 0xffffff) {
 		if (vdma_debug)
@@ -228,8 +228,7 @@ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size)
 		return -EINVAL;	/* invalid physical address */
 	}
 
-	npages = pages =
-	    (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1;
+	pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1;
 	first = laddr >> 12;
 	if (vdma_debug)
 		printk("vdma_remap: first=%x, pages=%x\n", first, pages);
diff --git a/arch/mips/jz4740/dma.c b/arch/mips/jz4740/dma.c
index 5ebe75a68350..d7feb898692c 100644
--- a/arch/mips/jz4740/dma.c
+++ b/arch/mips/jz4740/dma.c
@@ -242,9 +242,7 @@ EXPORT_SYMBOL_GPL(jz4740_dma_get_residue);
 
 static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma)
 {
-	uint32_t status;
-
-	status = jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id));
+	(void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id));
 
 	jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0,
 		JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE);
diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
index 6a9e14dab91e..d97cfbf882f5 100644
--- a/arch/mips/jz4740/setup.c
+++ b/arch/mips/jz4740/setup.c
@@ -1,5 +1,6 @@
 /*
  *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
+ *  Copyright (C) 2011, Maarten ter Huurne <maarten@treewalker.org>
  *  JZ4740 setup code
  *
  *  This program is free software; you can redistribute it and/or modify it
@@ -14,13 +15,44 @@
  */
 
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 
+#include <asm/bootinfo.h>
+
+#include <asm/mach-jz4740/base.h>
+
 #include "reset.h"
 
+
+#define JZ4740_EMC_SDRAM_CTRL 0x80
+
+
+static void __init jz4740_detect_mem(void)
+{
+	void __iomem *jz_emc_base;
+	u32 ctrl, bus, bank, rows, cols;
+	phys_t size;
+
+	jz_emc_base = ioremap(JZ4740_EMC_BASE_ADDR, 0x100);
+	ctrl = readl(jz_emc_base + JZ4740_EMC_SDRAM_CTRL);
+	bus = 2 - ((ctrl >> 31) & 1);
+	bank = 1 + ((ctrl >> 19) & 1);
+	cols = 8 + ((ctrl >> 26) & 7);
+	rows = 11 + ((ctrl >> 20) & 3);
+	printk(KERN_DEBUG
+		"SDRAM preconfigured: bus:%u bank:%u rows:%u cols:%u\n",
+		bus, bank, rows, cols);
+	iounmap(jz_emc_base);
+
+	size = 1 << (bus + bank + cols + rows);
+	add_memory_region(0, size, BOOT_MEM_RAM);
+}
+
 void __init plat_mem_setup(void)
 {
 	jz4740_reset_init();
+	jz4740_detect_mem();
 }
 
 const char *get_system_type(void)
diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
index fe01678d94fd..f83c2dd07a27 100644
--- a/arch/mips/jz4740/time.c
+++ b/arch/mips/jz4740/time.c
@@ -89,7 +89,7 @@ static int jz4740_clockevent_set_next(unsigned long evt,
 
 static struct clock_event_device jz4740_clockevent = {
 	.name = "jz4740-timer",
-	.features = CLOCK_EVT_FEAT_PERIODIC,
+	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_next_event = jz4740_clockevent_set_next,
 	.set_mode = jz4740_clockevent_set_mode,
 	.rating = 200,
@@ -121,8 +121,7 @@ void __init plat_time_init(void)
 
 	clockevents_register_device(&jz4740_clockevent);
 
-	clocksource_set_clock(&jz4740_clocksource, clk_rate);
-	ret = clocksource_register(&jz4740_clocksource);
+	ret = clocksource_register_hz(&jz4740_clocksource, clk_rate);
 
 	if (ret)
 		printk(KERN_ERR "Failed to register clocksource: %d\n", ret);
diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c
index b2c015129055..654d5c3900b6 100644
--- a/arch/mips/jz4740/timer.c
+++ b/arch/mips/jz4740/timer.c
@@ -27,11 +27,13 @@ void jz4740_timer_enable_watchdog(void)
 {
 	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR);
 }
+EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog);
 
 void jz4740_timer_disable_watchdog(void)
 {
 	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET);
 }
+EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog);
 
 void __init jz4740_timer_init(void)
 {
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index cedee2bcbd18..83bba332bbfc 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= r2300_fpu.o r2300_switch.o
 obj-$(CONFIG_CPU_TX49XX)	+= r4k_fpu.o r4k_switch.o
 obj-$(CONFIG_CPU_VR41XX)	+= r4k_fpu.o r4k_switch.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= octeon_switch.o
+obj-$(CONFIG_CPU_XLR)		+= r4k_fpu.o r4k_switch.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SMP_UP)		+= smp-up.o
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index 0b7377361e22..f0ab92a1b057 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -51,8 +51,7 @@ void __init txx9_clocksource_init(unsigned long baseaddr,
 {
 	struct txx9_tmr_reg __iomem *tmrptr;
 
-	clocksource_set_clock(&txx9_clocksource.cs, TIMER_CLK(imbusclk));
-	clocksource_register(&txx9_clocksource.cs);
+	clocksource_register_hz(&txx9_clocksource.cs, TIMER_CLK(imbusclk));
 
 	tmrptr = ioremap(baseaddr, sizeof(struct txx9_tmr_reg));
 	__raw_writel(TCR_BASE, &tmrptr->tcr);
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index f65d4c8c65a6..bb133d10b145 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -291,6 +291,12 @@ static inline int cpu_has_confreg(void)
 #endif
 }
 
+static inline void set_elf_platform(int cpu, const char *plat)
+{
+	if (cpu == 0)
+		__elf_platform = plat;
+}
+
 /*
  * Get the FPU Implementation/Revision.
  */
@@ -614,6 +620,16 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_LOONGSON2:
 		c->cputype = CPU_LOONGSON2;
 		__cpu_name[cpu] = "ICT Loongson-2";
+
+		switch (c->processor_id & PRID_REV_MASK) {
+		case PRID_REV_LOONGSON2E:
+			set_elf_platform(cpu, "loongson2e");
+			break;
+		case PRID_REV_LOONGSON2F:
+			set_elf_platform(cpu, "loongson2f");
+			break;
+		}
+
 		c->isa_level = MIPS_CPU_ISA_III;
 		c->options = R4K_OPTS |
 			     MIPS_CPU_FPU | MIPS_CPU_LLSC |
@@ -911,12 +927,14 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_BMIPS32_REV8:
 		c->cputype = CPU_BMIPS32;
 		__cpu_name[cpu] = "Broadcom BMIPS32";
+		set_elf_platform(cpu, "bmips32");
 		break;
 	case PRID_IMP_BMIPS3300:
 	case PRID_IMP_BMIPS3300_ALT:
 	case PRID_IMP_BMIPS3300_BUG:
 		c->cputype = CPU_BMIPS3300;
 		__cpu_name[cpu] = "Broadcom BMIPS3300";
+		set_elf_platform(cpu, "bmips3300");
 		break;
 	case PRID_IMP_BMIPS43XX: {
 		int rev = c->processor_id & 0xff;
@@ -925,15 +943,18 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
 				rev <= PRID_REV_BMIPS4380_HI) {
 			c->cputype = CPU_BMIPS4380;
 			__cpu_name[cpu] = "Broadcom BMIPS4380";
+			set_elf_platform(cpu, "bmips4380");
 		} else {
 			c->cputype = CPU_BMIPS4350;
 			__cpu_name[cpu] = "Broadcom BMIPS4350";
+			set_elf_platform(cpu, "bmips4350");
 		}
 		break;
 	}
 	case PRID_IMP_BMIPS5000:
 		c->cputype = CPU_BMIPS5000;
 		__cpu_name[cpu] = "Broadcom BMIPS5000";
+		set_elf_platform(cpu, "bmips5000");
 		c->options |= MIPS_CPU_ULRI;
 		break;
 	}
@@ -956,14 +977,12 @@ static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_CAVIUM_OCTEON_PLUS;
 		__cpu_name[cpu] = "Cavium Octeon+";
 platform:
-		if (cpu == 0)
-			__elf_platform = "octeon";
+		set_elf_platform(cpu, "octeon");
 		break;
 	case PRID_IMP_CAVIUM_CN63XX:
 		c->cputype = CPU_CAVIUM_OCTEON2;
 		__cpu_name[cpu] = "Cavium Octeon II";
-		if (cpu == 0)
-			__elf_platform = "octeon2";
+		set_elf_platform(cpu, "octeon2");
 		break;
 	default:
 		printk(KERN_INFO "Unknown Octeon chip!\n");
@@ -988,6 +1007,59 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 	}
 }
 
+static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
+{
+	decode_configs(c);
+
+	c->options = (MIPS_CPU_TLB       |
+			MIPS_CPU_4KEX    |
+			MIPS_CPU_COUNTER |
+			MIPS_CPU_DIVEC   |
+			MIPS_CPU_WATCH   |
+			MIPS_CPU_EJTAG   |
+			MIPS_CPU_LLSC);
+
+	switch (c->processor_id & 0xff00) {
+	case PRID_IMP_NETLOGIC_XLR732:
+	case PRID_IMP_NETLOGIC_XLR716:
+	case PRID_IMP_NETLOGIC_XLR532:
+	case PRID_IMP_NETLOGIC_XLR308:
+	case PRID_IMP_NETLOGIC_XLR532C:
+	case PRID_IMP_NETLOGIC_XLR516C:
+	case PRID_IMP_NETLOGIC_XLR508C:
+	case PRID_IMP_NETLOGIC_XLR308C:
+		c->cputype = CPU_XLR;
+		__cpu_name[cpu] = "Netlogic XLR";
+		break;
+
+	case PRID_IMP_NETLOGIC_XLS608:
+	case PRID_IMP_NETLOGIC_XLS408:
+	case PRID_IMP_NETLOGIC_XLS404:
+	case PRID_IMP_NETLOGIC_XLS208:
+	case PRID_IMP_NETLOGIC_XLS204:
+	case PRID_IMP_NETLOGIC_XLS108:
+	case PRID_IMP_NETLOGIC_XLS104:
+	case PRID_IMP_NETLOGIC_XLS616B:
+	case PRID_IMP_NETLOGIC_XLS608B:
+	case PRID_IMP_NETLOGIC_XLS416B:
+	case PRID_IMP_NETLOGIC_XLS412B:
+	case PRID_IMP_NETLOGIC_XLS408B:
+	case PRID_IMP_NETLOGIC_XLS404B:
+		c->cputype = CPU_XLR;
+		__cpu_name[cpu] = "Netlogic XLS";
+		break;
+
+	default:
+		printk(KERN_INFO "Unknown Netlogic chip id [%02x]!\n",
+		       c->processor_id);
+		c->cputype = CPU_XLR;
+		break;
+	}
+
+	c->isa_level = MIPS_CPU_ISA_M64R1;
+	c->tlbsize = ((read_c0_config1() >> 25) & 0x3f) + 1;
+}
+
 #ifdef CONFIG_64BIT
 /* For use by uaccess.h */
 u64 __ua_limit;
@@ -1035,6 +1107,9 @@ __cpuinit void cpu_probe(void)
 	case PRID_COMP_INGENIC:
 		cpu_probe_ingenic(c, cpu);
 		break;
+	case PRID_COMP_NETLOGIC:
+		cpu_probe_netlogic(c, cpu);
+		break;
 	}
 
 	BUG_ON(!__cpu_name[cpu]);
diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c
index 51489f8a825e..f96f99c794a3 100644
--- a/arch/mips/kernel/csrc-bcm1480.c
+++ b/arch/mips/kernel/csrc-bcm1480.c
@@ -49,6 +49,5 @@ void __init sb1480_clocksource_init(void)
 
 	plldiv = G_BCM1480_SYS_PLL_DIV(__raw_readq(IOADDR(A_SCD_SYSTEM_CFG)));
 	zbbus = ((plldiv >> 1) * 50000000) + ((plldiv & 1) * 25000000);
-	clocksource_set_clock(cs, zbbus);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, zbbus);
 }
diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c
index 23da108506b0..46bd7fa98d6c 100644
--- a/arch/mips/kernel/csrc-ioasic.c
+++ b/arch/mips/kernel/csrc-ioasic.c
@@ -59,7 +59,5 @@ void __init dec_ioasic_clocksource_init(void)
 	printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq);
 
 	clocksource_dec.rating = 200 + freq / 10000000;
-	clocksource_set_clock(&clocksource_dec, freq);
-
-	clocksource_register(&clocksource_dec);
+	clocksource_register_hz(&clocksource_dec, freq);
 }
diff --git a/arch/mips/kernel/csrc-powertv.c b/arch/mips/kernel/csrc-powertv.c
index a27c16c8690e..2e7c5232da8d 100644
--- a/arch/mips/kernel/csrc-powertv.c
+++ b/arch/mips/kernel/csrc-powertv.c
@@ -78,9 +78,7 @@ static void __init powertv_c0_hpt_clocksource_init(void)
 
 	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 
-	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
-
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 }
 
 /**
@@ -130,43 +128,16 @@ static struct clocksource clocksource_tim_c = {
 /**
  * powertv_tim_c_clocksource_init - set up a clock source for the TIM_C clock
  *
- * The hard part here is coming up with a constant k and shift s such that
- * the 48-bit TIM_C value multiplied by k doesn't overflow and that value,
- * when shifted right by s, yields the corresponding number of nanoseconds.
  * We know that TIM_C counts at 27 MHz/8, so each cycle corresponds to
- * 1 / (27,000,000/8) seconds. Multiply that by a billion and you get the
- * number of nanoseconds. Since the TIM_C value has 48 bits and the math is
- * done in 64 bits, avoiding an overflow means that k must be less than
- * 64 - 48 = 16 bits.
+ * 1 / (27,000,000/8) seconds.
  */
 static void __init powertv_tim_c_clocksource_init(void)
 {
-	int			prescale;
-	unsigned long		dividend;
-	unsigned long		k;
-	int			s;
-	const int		max_k_bits = (64 - 48) - 1;
-	const unsigned long	billion = 1000000000;
 	const unsigned long	counts_per_second = 27000000 / 8;
 
-	prescale = BITS_PER_LONG - ilog2(billion) - 1;
-	dividend = billion << prescale;
-	k = dividend / counts_per_second;
-	s = ilog2(k) - max_k_bits;
-
-	if (s < 0)
-		s = prescale;
-
-	else {
-		k >>= s;
-		s += prescale;
-	}
-
-	clocksource_tim_c.mult = k;
-	clocksource_tim_c.shift = s;
 	clocksource_tim_c.rating = 200;
 
-	clocksource_register(&clocksource_tim_c);
+	clocksource_register_hz(&clocksource_tim_c, counts_per_second);
 	tim_c = (struct tim_c *) asic_reg_addr(tim_ch);
 }
 
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index e95a3cd48eea..decd1fa38d55 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -30,9 +30,7 @@ int __init init_r4k_clocksource(void)
 	/* Calculate a somewhat reasonable rating value */
 	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 
-	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
-
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 
 	return 0;
 }
diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c
index d14d3d1907fa..e9606d907685 100644
--- a/arch/mips/kernel/csrc-sb1250.c
+++ b/arch/mips/kernel/csrc-sb1250.c
@@ -65,6 +65,5 @@ void __init sb1250_clocksource_init(void)
 		     IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM,
 						 R_SCD_TIMER_CFG)));
 
-	clocksource_set_clock(cs, V_SCD_TIMER_FREQ);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, V_SCD_TIMER_FREQ);
 }
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index ffa331029e08..37acfa036d44 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -167,14 +167,13 @@ work_notifysig:				# deal with pending signals and
 FEXPORT(syscall_exit_work_partial)
 	SAVE_STATIC
 syscall_exit_work:
-	li	t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	li	t0, _TIF_WORK_SYSCALL_EXIT
 	and	t0, a2			# a2 is preloaded with TI_FLAGS
 	beqz	t0, work_pending	# trace bit set?
-	local_irq_enable		# could let do_syscall_trace()
+	local_irq_enable		# could let syscall_trace_leave()
 					# call schedule() instead
 	move	a0, sp
-	li	a1, 1
-	jal	do_syscall_trace
+	jal	syscall_trace_leave
 	b	resume_userspace
 
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT)
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 94ca2b018af7..feb8021a305f 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -23,6 +23,7 @@
 
 #define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
 #define ADDR_MASK 0x03ffffff	/*  op_code|addr : 31...26|25 ....0 */
+#define JUMP_RANGE_MASK ((1UL << 28) - 1)
 
 #define INSN_NOP 0x00000000	/* nop */
 #define INSN_JAL(addr)	\
@@ -44,12 +45,12 @@ static inline void ftrace_dyn_arch_init_insns(void)
 
 	/* jal (ftrace_caller + 8), jump over the first two instruction */
 	buf = (u32 *)&insn_jal_ftrace_caller;
-	uasm_i_jal(&buf, (FTRACE_ADDR + 8));
+	uasm_i_jal(&buf, (FTRACE_ADDR + 8) & JUMP_RANGE_MASK);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	/* j ftrace_graph_caller */
 	buf = (u32 *)&insn_j_ftrace_graph_caller;
-	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller);
+	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller & JUMP_RANGE_MASK);
 #endif
 }
 
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index 2392a7a296d4..391221b6a6aa 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -125,87 +125,11 @@ void __init setup_pit_timer(void)
 	setup_irq(0, &irq0);
 }
 
-/*
- * Since the PIT overflows every tick, its not very useful
- * to just read by itself. So use jiffies to emulate a free
- * running counter:
- */
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	int count;
-	u32 jifs;
-	static int old_count;
-	static u32 old_jifs;
-
-	raw_spin_lock_irqsave(&i8253_lock, flags);
-	/*
-	 * Although our caller may have the read side of xtime_lock,
-	 * this is now a seqlock, and we are cheating in this routine
-	 * by having side effects on state that we cannot undo if
-	 * there is a collision on the seqlock and our caller has to
-	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
-	 * jiffies as volatile despite the lock.  We read jiffies
-	 * before latching the timer count to guarantee that although
-	 * the jiffies value might be older than the count (that is,
-	 * the counter may underflow between the last point where
-	 * jiffies was incremented and the point where we latch the
-	 * count), it cannot be newer.
-	 */
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);	/* latch the count ASAP */
-	count = inb_p(PIT_CH0);	/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	/* VIA686a test code... reset the latch if count > max + 1 */
-	if (count > LATCH) {
-		outb_p(0x34, PIT_MODE);
-		outb_p(LATCH & 0xff, PIT_CH0);
-		outb(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-
-	/*
-	 * It's possible for count to appear to go the wrong way for a
-	 * couple of reasons:
-	 *
-	 *  1. The timer counter underflows, but we haven't handled the
-	 *     resulting interrupt and incremented jiffies yet.
-	 *  2. Hardware problem with the timer, not giving us continuous time,
-	 *     the counter does small "jumps" upwards on some Pentium systems,
-	 *     (see c't 95/10 page 335 for Neptun bug.)
-	 *
-	 * Previous attempts to handle these cases intelligently were
-	 * buggy, so we just do the simple thing now.
-	 */
-	if (count > old_count && jifs == old_jifs) {
-		count = old_count;
-	}
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
-	count = (LATCH - 1) - count;
-
-	return (cycle_t)(jifs * LATCH) + count;
-}
-
-static struct clocksource clocksource_pit = {
-	.name	= "pit",
-	.rating = 110,
-	.read	= pit_read,
-	.mask	= CLOCKSOURCE_MASK(32),
-	.mult	= 0,
-	.shift	= 20,
-};
-
 static int __init init_pit_clocksource(void)
 {
 	if (num_possible_cpus() > 1) /* PIT does not scale! */
 		return 0;
 
-	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
-	return clocksource_register(&clocksource_pit);
+	return clocksource_i8253_init();
 }
 arch_initcall(init_pit_clocksource);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d21c388c0116..4e6ea1ffad46 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -533,15 +533,10 @@ static inline int audit_arch(void)
  * Notification of system call entry/exit
  * - triggered by current->work.syscall_trace
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 {
 	/* do the secure computing check first */
-	if (!entryexit)
-		secure_computing(regs->regs[2]);
-
-	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]),
-		                   regs->regs[2]);
+	secure_computing(regs->regs[2]);
 
 	if (!(current->ptrace & PT_PTRACED))
 		goto out;
@@ -565,8 +560,40 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 	}
 
 out:
-	if (unlikely(current->audit_context) && !entryexit)
+	if (unlikely(current->audit_context))
 		audit_syscall_entry(audit_arch(), regs->regs[2],
 				    regs->regs[4], regs->regs[5],
 				    regs->regs[6], regs->regs[7]);
 }
+
+/*
+ * Notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+ */
+asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
+		                   -regs->regs[2]);
+
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+
+	/* The 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ?
+	                         0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f5468b38d4c..7a8e1dd7f6f2 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -88,8 +88,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -565,7 +564,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_ioprio_get		2	/* 4315 */
 	sys	sys_utimensat		4
 	sys	sys_signalfd		3
-	sys	sys_ni_syscall		0
+	sys	sys_ni_syscall		0	/* was timerfd */
 	sys	sys_eventfd		1
 	sys	sys_fallocate		6	/* 4320 */
 	sys	sys_timerfd_create	2
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a2e1fcbc41dc..2d31c83224f9 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -91,8 +91,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -404,7 +403,7 @@ sys_call_table:
 	PTR	sys_ioprio_get
 	PTR	sys_utimensat			/* 5275 */
 	PTR	sys_signalfd
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	PTR	sys_timerfd_create		/* 5280 */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b2c7624995b8..38a0503b9a4a 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -89,8 +89,7 @@ n32_syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -403,7 +402,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_ioprio_get
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd		/* 6280 */
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	PTR	sys_timerfd_create
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 049a9c8c49a0..91ea5e4041dd 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -123,8 +123,7 @@ trace_a_syscall:
 
 	move	s0, t2			# Save syscall pointer
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -522,7 +521,7 @@ sys_call_table:
 	PTR	sys_ioprio_get			/* 4315 */
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys32_fallocate			/* 4320 */
 	PTR	sys_timerfd_create
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 5a88cc4ccd5a..cedac4633741 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
 
 static void ipi_resched_interrupt(void)
 {
-	/* Return from interrupt should be enough to cause scheduler check */
+	scheduler_ipi();
 }
 
 static void ipi_call_interrupt(void)
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 58beabf50b3c..d02765708ddb 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -10,12 +10,9 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/linkage.h>
-#include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
-#include <linux/mman.h>
 #include <linux/ptrace.h>
-#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/syscalls.h>
 #include <linux/file.h>
@@ -25,11 +22,9 @@
 #include <linux/msg.h>
 #include <linux/shm.h>
 #include <linux/compiler.h>
-#include <linux/module.h>
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <linux/random.h>
 #include <linux/elf.h>
 
 #include <asm/asm.h>
@@ -66,121 +61,6 @@ out:
 	return res;
 }
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
-
-EXPORT_SYMBOL(shm_align_mask);
-
-#define COLOUR_ALIGN(addr,pgoff)				\
-	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
-	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
-
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
-	unsigned long len, unsigned long pgoff, unsigned long flags)
-{
-	struct vm_area_struct * vmm;
-	int do_color_align;
-	unsigned long task_size;
-
-#ifdef CONFIG_32BIT
-	task_size = TASK_SIZE;
-#else /* Must be CONFIG_64BIT*/
-	task_size = test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE;
-#endif
-
-	if (len > task_size)
-		return -ENOMEM;
-
-	if (flags & MAP_FIXED) {
-		/* Even MAP_FIXED mappings must reside within task_size.  */
-		if (task_size - len < addr)
-			return -EINVAL;
-
-		/*
-		 * We do not accept a shared mapping if it would violate
-		 * cache aliasing constraints.
-		 */
-		if ((flags & MAP_SHARED) &&
-		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
-			return -EINVAL;
-		return addr;
-	}
-
-	do_color_align = 0;
-	if (filp || (flags & MAP_SHARED))
-		do_color_align = 1;
-	if (addr) {
-		if (do_color_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-		else
-			addr = PAGE_ALIGN(addr);
-		vmm = find_vma(current->mm, addr);
-		if (task_size - len >= addr &&
-		    (!vmm || addr + len <= vmm->vm_start))
-			return addr;
-	}
-	addr = current->mm->mmap_base;
-	if (do_color_align)
-		addr = COLOUR_ALIGN(addr, pgoff);
-	else
-		addr = PAGE_ALIGN(addr);
-
-	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
-		/* At this point:  (!vmm || addr < vmm->vm_end). */
-		if (task_size - len < addr)
-			return -ENOMEM;
-		if (!vmm || addr + len <= vmm->vm_start)
-			return addr;
-		addr = vmm->vm_end;
-		if (do_color_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-	}
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-	unsigned long random_factor = 0UL;
-
-	if (current->flags & PF_RANDOMIZE) {
-		random_factor = get_random_int();
-		random_factor = random_factor << PAGE_SHIFT;
-		if (TASK_IS_32BIT_ADDR)
-			random_factor &= 0xfffffful;
-		else
-			random_factor &= 0xffffffful;
-	}
-
-	mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
-}
-
-static inline unsigned long brk_rnd(void)
-{
-	unsigned long rnd = get_random_int();
-
-	rnd = rnd << PAGE_SHIFT;
-	/* 8MB for 32bit, 256MB for 64bit */
-	if (TASK_IS_32BIT_ADDR)
-		rnd = rnd & 0x7ffffful;
-	else
-		rnd = rnd & 0xffffffful;
-
-	return rnd;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long base = mm->brk;
-	unsigned long ret;
-
-	ret = PAGE_ALIGN(base + brk_rnd());
-
-	if (ret < mm->brk)
-		return mm->brk;
-
-	return ret;
-}
-
 SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len,
 	unsigned long, prot, unsigned long, flags, unsigned long,
 	fd, off_t, offset)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 71350f7f2d88..e9b3af27d844 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	unsigned long dvpret = dvpe();
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-	notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV);
+	if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
+		sig = 0;
 
 	console_verbose();
 	spin_lock_irq(&die_lock);
@@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	mips_mt_regdump(dvpret);
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-	if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
-		sig = 0;
-
 	printk("%s[#%d]:\n", str, ++die_counter);
 	show_registers(regs);
 	add_taint(TAINT_DIE);
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 832afbb87588..01af3876cf90 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -65,15 +65,18 @@ SECTIONS
 	NOTES :text :note
 	.dummy : { *(.dummy) } :text
 
+	_sdata = .;			/* Start of data section */
 	RODATA
 
 	/* writeable */
+	_sdata = .;				/* Start of data section */
 	.data : {	/* Data */
 		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
 
 		INIT_TASK_DATA(PAGE_SIZE)
 		NOSAVE_DATA
 		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
+		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
 		DATA_DATA
 		CONSTRUCTORS
 	}
diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig
new file mode 100644
index 000000000000..3fccf2104513
--- /dev/null
+++ b/arch/mips/lantiq/Kconfig
@@ -0,0 +1,23 @@
+if LANTIQ
+
+config SOC_TYPE_XWAY
+	bool
+	default n
+
+choice
+	prompt "SoC Type"
+	default SOC_XWAY
+
+config SOC_AMAZON_SE
+	bool "Amazon SE"
+	select SOC_TYPE_XWAY
+
+config SOC_XWAY
+	bool "XWAY"
+	select SOC_TYPE_XWAY
+	select HW_HAS_PCI
+endchoice
+
+source "arch/mips/lantiq/xway/Kconfig"
+
+endif
diff --git a/arch/mips/lantiq/Makefile b/arch/mips/lantiq/Makefile
new file mode 100644
index 000000000000..e5dae0e24b00
--- /dev/null
+++ b/arch/mips/lantiq/Makefile
@@ -0,0 +1,11 @@
+# Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+
+obj-y := irq.o setup.o clk.o prom.o devices.o
+
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+
+obj-$(CONFIG_SOC_TYPE_XWAY) += xway/
diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform
new file mode 100644
index 000000000000..f3dff05722de
--- /dev/null
+++ b/arch/mips/lantiq/Platform
@@ -0,0 +1,8 @@
+#
+# Lantiq
+#
+
+platform-$(CONFIG_LANTIQ)	+= lantiq/
+cflags-$(CONFIG_LANTIQ)		+= -I$(srctree)/arch/mips/include/asm/mach-lantiq
+load-$(CONFIG_LANTIQ)		= 0xffffffff80002000
+cflags-$(CONFIG_SOC_TYPE_XWAY)	+= -I$(srctree)/arch/mips/include/asm/mach-lantiq/xway
diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
new file mode 100644
index 000000000000..94560899d13e
--- /dev/null
+++ b/arch/mips/lantiq/clk.c
@@ -0,0 +1,140 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/list.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+#include "clk.h"
+
+struct clk {
+	const char *name;
+	unsigned long rate;
+	unsigned long (*get_rate) (void);
+};
+
+static struct clk *cpu_clk;
+static int cpu_clk_cnt;
+
+/* lantiq socs have 3 static clocks */
+static struct clk cpu_clk_generic[] = {
+	{
+		.name = "cpu",
+		.get_rate = ltq_get_cpu_hz,
+	}, {
+		.name = "fpi",
+		.get_rate = ltq_get_fpi_hz,
+	}, {
+		.name = "io",
+		.get_rate = ltq_get_io_region_clock,
+	},
+};
+
+static struct resource ltq_cgu_resource = {
+	.name	= "cgu",
+	.start	= LTQ_CGU_BASE_ADDR,
+	.end	= LTQ_CGU_BASE_ADDR + LTQ_CGU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+/* remapped clock register range */
+void __iomem *ltq_cgu_membase;
+
+void clk_init(void)
+{
+	cpu_clk = cpu_clk_generic;
+	cpu_clk_cnt = ARRAY_SIZE(cpu_clk_generic);
+}
+
+static inline int clk_good(struct clk *clk)
+{
+	return clk && !IS_ERR(clk);
+}
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (unlikely(!clk_good(clk)))
+		return 0;
+
+	if (clk->rate != 0)
+		return clk->rate;
+
+	if (clk->get_rate != NULL)
+		return clk->get_rate();
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	int i;
+
+	for (i = 0; i < cpu_clk_cnt; i++)
+		if (!strcmp(id, cpu_clk[i].name))
+			return &cpu_clk[i];
+	BUG();
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+	/* not used */
+}
+EXPORT_SYMBOL(clk_put);
+
+static inline u32 ltq_get_counter_resolution(void)
+{
+	u32 res;
+
+	__asm__ __volatile__(
+		".set   push\n"
+		".set   mips32r2\n"
+		"rdhwr  %0, $3\n"
+		".set pop\n"
+		: "=&r" (res)
+		: /* no input */
+		: "memory");
+
+	return res;
+}
+
+void __init plat_time_init(void)
+{
+	struct clk *clk;
+
+	if (insert_resource(&iomem_resource, &ltq_cgu_resource) < 0)
+		panic("Failed to insert cgu memory\n");
+
+	if (request_mem_region(ltq_cgu_resource.start,
+			resource_size(&ltq_cgu_resource), "cgu") < 0)
+		panic("Failed to request cgu memory\n");
+
+	ltq_cgu_membase = ioremap_nocache(ltq_cgu_resource.start,
+				resource_size(&ltq_cgu_resource));
+	if (!ltq_cgu_membase) {
+		pr_err("Failed to remap cgu memory\n");
+		unreachable();
+	}
+	clk = clk_get(0, "cpu");
+	mips_hpt_frequency = clk_get_rate(clk) / ltq_get_counter_resolution();
+	write_c0_compare(read_c0_count());
+	clk_put(clk);
+}
diff --git a/arch/mips/lantiq/clk.h b/arch/mips/lantiq/clk.h
new file mode 100644
index 000000000000..3328925f2c3f
--- /dev/null
+++ b/arch/mips/lantiq/clk.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_CLK_H__
+#define _LTQ_CLK_H__
+
+extern void clk_init(void);
+
+extern unsigned long ltq_get_cpu_hz(void);
+extern unsigned long ltq_get_fpi_hz(void);
+extern unsigned long ltq_get_io_region_clock(void);
+
+#endif
diff --git a/arch/mips/lantiq/devices.c b/arch/mips/lantiq/devices.c
new file mode 100644
index 000000000000..7b82c34cb169
--- /dev/null
+++ b/arch/mips/lantiq/devices.c
@@ -0,0 +1,122 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/etherdevice.h>
+#include <linux/reboot.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+
+#include <lantiq_soc.h>
+
+#include "devices.h"
+
+/* nor flash */
+static struct resource ltq_nor_resource = {
+	.name	= "nor",
+	.start	= LTQ_FLASH_START,
+	.end	= LTQ_FLASH_START + LTQ_FLASH_MAX - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+static struct platform_device ltq_nor = {
+	.name		= "ltq_nor",
+	.resource	= &ltq_nor_resource,
+	.num_resources	= 1,
+};
+
+void __init ltq_register_nor(struct physmap_flash_data *data)
+{
+	ltq_nor.dev.platform_data = data;
+	platform_device_register(&ltq_nor);
+}
+
+/* watchdog */
+static struct resource ltq_wdt_resource = {
+	.name	= "watchdog",
+	.start  = LTQ_WDT_BASE_ADDR,
+	.end    = LTQ_WDT_BASE_ADDR + LTQ_WDT_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+void __init ltq_register_wdt(void)
+{
+	platform_device_register_simple("ltq_wdt", 0, &ltq_wdt_resource, 1);
+}
+
+/* asc ports */
+static struct resource ltq_asc0_resources[] = {
+	{
+		.name	= "asc0",
+		.start  = LTQ_ASC0_BASE_ADDR,
+		.end    = LTQ_ASC0_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_TIR(0)),
+	IRQ_RES(rx, LTQ_ASC_RIR(0)),
+	IRQ_RES(err, LTQ_ASC_EIR(0)),
+};
+
+static struct resource ltq_asc1_resources[] = {
+	{
+		.name	= "asc1",
+		.start  = LTQ_ASC1_BASE_ADDR,
+		.end    = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_TIR(1)),
+	IRQ_RES(rx, LTQ_ASC_RIR(1)),
+	IRQ_RES(err, LTQ_ASC_EIR(1)),
+};
+
+void __init ltq_register_asc(int port)
+{
+	switch (port) {
+	case 0:
+		platform_device_register_simple("ltq_asc", 0,
+			ltq_asc0_resources, ARRAY_SIZE(ltq_asc0_resources));
+		break;
+	case 1:
+		platform_device_register_simple("ltq_asc", 1,
+			ltq_asc1_resources, ARRAY_SIZE(ltq_asc1_resources));
+		break;
+	default:
+		break;
+	}
+}
+
+#ifdef CONFIG_PCI
+/* pci */
+static struct platform_device ltq_pci = {
+	.name		= "ltq_pci",
+	.num_resources	= 0,
+};
+
+void __init ltq_register_pci(struct ltq_pci_data *data)
+{
+	ltq_pci.dev.platform_data = data;
+	platform_device_register(&ltq_pci);
+}
+#else
+void __init ltq_register_pci(struct ltq_pci_data *data)
+{
+	pr_err("kernel is compiled without PCI support\n");
+}
+#endif
diff --git a/arch/mips/lantiq/devices.h b/arch/mips/lantiq/devices.h
new file mode 100644
index 000000000000..2947bb19a528
--- /dev/null
+++ b/arch/mips/lantiq/devices.h
@@ -0,0 +1,23 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_DEVICES_H__
+#define _LTQ_DEVICES_H__
+
+#include <lantiq_platform.h>
+#include <linux/mtd/physmap.h>
+
+#define IRQ_RES(resname, irq) \
+	{.name = #resname, .start = (irq), .flags = IORESOURCE_IRQ}
+
+extern void ltq_register_nor(struct physmap_flash_data *data);
+extern void ltq_register_wdt(void);
+extern void ltq_register_asc(int port);
+extern void ltq_register_pci(struct ltq_pci_data *data);
+
+#endif
diff --git a/arch/mips/lantiq/early_printk.c b/arch/mips/lantiq/early_printk.c
new file mode 100644
index 000000000000..972e05f87631
--- /dev/null
+++ b/arch/mips/lantiq/early_printk.c
@@ -0,0 +1,33 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+
+#include <lantiq.h>
+#include <lantiq_soc.h>
+
+/* no ioremap possible at this early stage, lets use KSEG1 instead  */
+#define LTQ_ASC_BASE	KSEG1ADDR(LTQ_ASC1_BASE_ADDR)
+#define ASC_BUF		1024
+#define LTQ_ASC_FSTAT	((u32 *)(LTQ_ASC_BASE + 0x0048))
+#define LTQ_ASC_TBUF	((u32 *)(LTQ_ASC_BASE + 0x0020))
+#define TXMASK		0x3F00
+#define TXOFFSET	8
+
+void prom_putchar(char c)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	do { } while ((ltq_r32(LTQ_ASC_FSTAT) & TXMASK) >> TXOFFSET);
+	if (c == '\n')
+		ltq_w32('\r', LTQ_ASC_TBUF);
+	ltq_w32(c, LTQ_ASC_TBUF);
+	local_irq_restore(flags);
+}
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
new file mode 100644
index 000000000000..fc89795cafdb
--- /dev/null
+++ b/arch/mips/lantiq/irq.c
@@ -0,0 +1,326 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq_cpu.h>
+
+#include <lantiq_soc.h>
+#include <irq.h>
+
+/* register definitions */
+#define LTQ_ICU_IM0_ISR		0x0000
+#define LTQ_ICU_IM0_IER		0x0008
+#define LTQ_ICU_IM0_IOSR	0x0010
+#define LTQ_ICU_IM0_IRSR	0x0018
+#define LTQ_ICU_IM0_IMR		0x0020
+#define LTQ_ICU_IM1_ISR		0x0028
+#define LTQ_ICU_OFFSET		(LTQ_ICU_IM1_ISR - LTQ_ICU_IM0_ISR)
+
+#define LTQ_EIU_EXIN_C		0x0000
+#define LTQ_EIU_EXIN_INIC	0x0004
+#define LTQ_EIU_EXIN_INEN	0x000C
+
+/* irq numbers used by the external interrupt unit (EIU) */
+#define LTQ_EIU_IR0		(INT_NUM_IM4_IRL0 + 30)
+#define LTQ_EIU_IR1		(INT_NUM_IM3_IRL0 + 31)
+#define LTQ_EIU_IR2		(INT_NUM_IM1_IRL0 + 26)
+#define LTQ_EIU_IR3		INT_NUM_IM1_IRL0
+#define LTQ_EIU_IR4		(INT_NUM_IM1_IRL0 + 1)
+#define LTQ_EIU_IR5		(INT_NUM_IM1_IRL0 + 2)
+#define LTQ_EIU_IR6		(INT_NUM_IM2_IRL0 + 30)
+
+#define MAX_EIU			6
+
+/* irqs generated by device attached to the EBU need to be acked in
+ * a special manner
+ */
+#define LTQ_ICU_EBU_IRQ		22
+
+#define ltq_icu_w32(x, y)	ltq_w32((x), ltq_icu_membase + (y))
+#define ltq_icu_r32(x)		ltq_r32(ltq_icu_membase + (x))
+
+#define ltq_eiu_w32(x, y)	ltq_w32((x), ltq_eiu_membase + (y))
+#define ltq_eiu_r32(x)		ltq_r32(ltq_eiu_membase + (x))
+
+static unsigned short ltq_eiu_irq[MAX_EIU] = {
+	LTQ_EIU_IR0,
+	LTQ_EIU_IR1,
+	LTQ_EIU_IR2,
+	LTQ_EIU_IR3,
+	LTQ_EIU_IR4,
+	LTQ_EIU_IR5,
+};
+
+static struct resource ltq_icu_resource = {
+	.name	= "icu",
+	.start	= LTQ_ICU_BASE_ADDR,
+	.end	= LTQ_ICU_BASE_ADDR + LTQ_ICU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct resource ltq_eiu_resource = {
+	.name	= "eiu",
+	.start	= LTQ_EIU_BASE_ADDR,
+	.end	= LTQ_EIU_BASE_ADDR + LTQ_ICU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_icu_membase;
+static void __iomem *ltq_eiu_membase;
+
+void ltq_disable_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier);
+}
+
+void ltq_mask_and_ack_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	u32 isr = LTQ_ICU_IM0_ISR;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier);
+	ltq_icu_w32((1 << irq_nr), isr);
+}
+
+static void ltq_ack_irq(struct irq_data *d)
+{
+	u32 isr = LTQ_ICU_IM0_ISR;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32((1 << irq_nr), isr);
+}
+
+void ltq_enable_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET  * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) | (1 << irq_nr), ier);
+}
+
+static unsigned int ltq_startup_eiu_irq(struct irq_data *d)
+{
+	int i;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ltq_enable_irq(d);
+	for (i = 0; i < MAX_EIU; i++) {
+		if (irq_nr == ltq_eiu_irq[i]) {
+			/* low level - we should really handle set_type */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) |
+				(0x6 << (i * 4)), LTQ_EIU_EXIN_C);
+			/* clear all pending */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INIC) & ~(1 << i),
+				LTQ_EIU_EXIN_INIC);
+			/* enable */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) | (1 << i),
+				LTQ_EIU_EXIN_INEN);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void ltq_shutdown_eiu_irq(struct irq_data *d)
+{
+	int i;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ltq_disable_irq(d);
+	for (i = 0; i < MAX_EIU; i++) {
+		if (irq_nr == ltq_eiu_irq[i]) {
+			/* disable */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~(1 << i),
+				LTQ_EIU_EXIN_INEN);
+			break;
+		}
+	}
+}
+
+static struct irq_chip ltq_irq_type = {
+	"icu",
+	.irq_enable = ltq_enable_irq,
+	.irq_disable = ltq_disable_irq,
+	.irq_unmask = ltq_enable_irq,
+	.irq_ack = ltq_ack_irq,
+	.irq_mask = ltq_disable_irq,
+	.irq_mask_ack = ltq_mask_and_ack_irq,
+};
+
+static struct irq_chip ltq_eiu_type = {
+	"eiu",
+	.irq_startup = ltq_startup_eiu_irq,
+	.irq_shutdown = ltq_shutdown_eiu_irq,
+	.irq_enable = ltq_enable_irq,
+	.irq_disable = ltq_disable_irq,
+	.irq_unmask = ltq_enable_irq,
+	.irq_ack = ltq_ack_irq,
+	.irq_mask = ltq_disable_irq,
+	.irq_mask_ack = ltq_mask_and_ack_irq,
+};
+
+static void ltq_hw_irqdispatch(int module)
+{
+	u32 irq;
+
+	irq = ltq_icu_r32(LTQ_ICU_IM0_IOSR + (module * LTQ_ICU_OFFSET));
+	if (irq == 0)
+		return;
+
+	/* silicon bug causes only the msb set to 1 to be valid. all
+	 * other bits might be bogus
+	 */
+	irq = __fls(irq);
+	do_IRQ((int)irq + INT_NUM_IM0_IRL0 + (INT_NUM_IM_OFFSET * module));
+
+	/* if this is a EBU irq, we need to ack it or get a deadlock */
+	if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0))
+		ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_ISTAT) | 0x10,
+			LTQ_EBU_PCC_ISTAT);
+}
+
+#define DEFINE_HWx_IRQDISPATCH(x)					\
+	static void ltq_hw ## x ## _irqdispatch(void)			\
+	{								\
+		ltq_hw_irqdispatch(x);					\
+	}
+DEFINE_HWx_IRQDISPATCH(0)
+DEFINE_HWx_IRQDISPATCH(1)
+DEFINE_HWx_IRQDISPATCH(2)
+DEFINE_HWx_IRQDISPATCH(3)
+DEFINE_HWx_IRQDISPATCH(4)
+
+static void ltq_hw5_irqdispatch(void)
+{
+	do_IRQ(MIPS_CPU_TIMER_IRQ);
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
+	unsigned int i;
+
+	if (pending & CAUSEF_IP7) {
+		do_IRQ(MIPS_CPU_TIMER_IRQ);
+		goto out;
+	} else {
+		for (i = 0; i < 5; i++) {
+			if (pending & (CAUSEF_IP2 << i)) {
+				ltq_hw_irqdispatch(i);
+				goto out;
+			}
+		}
+	}
+	pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());
+
+out:
+	return;
+}
+
+static struct irqaction cascade = {
+	.handler = no_action,
+	.flags = IRQF_DISABLED,
+	.name = "cascade",
+};
+
+void __init arch_init_irq(void)
+{
+	int i;
+
+	if (insert_resource(&iomem_resource, &ltq_icu_resource) < 0)
+		panic("Failed to insert icu memory\n");
+
+	if (request_mem_region(ltq_icu_resource.start,
+			resource_size(&ltq_icu_resource), "icu") < 0)
+		panic("Failed to request icu memory\n");
+
+	ltq_icu_membase = ioremap_nocache(ltq_icu_resource.start,
+				resource_size(&ltq_icu_resource));
+	if (!ltq_icu_membase)
+		panic("Failed to remap icu memory\n");
+
+	if (insert_resource(&iomem_resource, &ltq_eiu_resource) < 0)
+		panic("Failed to insert eiu memory\n");
+
+	if (request_mem_region(ltq_eiu_resource.start,
+			resource_size(&ltq_eiu_resource), "eiu") < 0)
+		panic("Failed to request eiu memory\n");
+
+	ltq_eiu_membase = ioremap_nocache(ltq_eiu_resource.start,
+				resource_size(&ltq_eiu_resource));
+	if (!ltq_eiu_membase)
+		panic("Failed to remap eiu memory\n");
+
+	/* make sure all irqs are turned off by default */
+	for (i = 0; i < 5; i++)
+		ltq_icu_w32(0, LTQ_ICU_IM0_IER + (i * LTQ_ICU_OFFSET));
+
+	/* clear all possibly pending interrupts */
+	ltq_icu_w32(~0, LTQ_ICU_IM0_ISR + (i * LTQ_ICU_OFFSET));
+
+	mips_cpu_irq_init();
+
+	for (i = 2; i <= 6; i++)
+		setup_irq(i, &cascade);
+
+	if (cpu_has_vint) {
+		pr_info("Setting up vectored interrupts\n");
+		set_vi_handler(2, ltq_hw0_irqdispatch);
+		set_vi_handler(3, ltq_hw1_irqdispatch);
+		set_vi_handler(4, ltq_hw2_irqdispatch);
+		set_vi_handler(5, ltq_hw3_irqdispatch);
+		set_vi_handler(6, ltq_hw4_irqdispatch);
+		set_vi_handler(7, ltq_hw5_irqdispatch);
+	}
+
+	for (i = INT_NUM_IRQ0;
+		i <= (INT_NUM_IRQ0 + (5 * INT_NUM_IM_OFFSET)); i++)
+		if ((i == LTQ_EIU_IR0) || (i == LTQ_EIU_IR1) ||
+			(i == LTQ_EIU_IR2))
+			irq_set_chip_and_handler(i, &ltq_eiu_type,
+				handle_level_irq);
+		/* EIU3-5 only exist on ar9 and vr9 */
+		else if (((i == LTQ_EIU_IR3) || (i == LTQ_EIU_IR4) ||
+			(i == LTQ_EIU_IR5)) && (ltq_is_ar9() || ltq_is_vr9()))
+			irq_set_chip_and_handler(i, &ltq_eiu_type,
+				handle_level_irq);
+		else
+			irq_set_chip_and_handler(i, &ltq_irq_type,
+				handle_level_irq);
+
+#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC)
+	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
+		IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
+#else
+	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 |
+		IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
+#endif
+}
+
+unsigned int __cpuinit get_c0_compare_int(void)
+{
+	return CP0_LEGACY_COMPARE_IRQ;
+}
diff --git a/arch/mips/lantiq/machtypes.h b/arch/mips/lantiq/machtypes.h
new file mode 100644
index 000000000000..7e01b8c484eb
--- /dev/null
+++ b/arch/mips/lantiq/machtypes.h
@@ -0,0 +1,20 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_MACH_H__
+#define _LANTIQ_MACH_H__
+
+#include <asm/mips_machine.h>
+
+enum lantiq_mach_type {
+	LTQ_MACH_GENERIC = 0,
+	LTQ_MACH_EASY50712,	/* Danube evaluation board */
+	LTQ_MACH_EASY50601,	/* Amazon SE evaluation board */
+};
+
+#endif
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
new file mode 100644
index 000000000000..56ba007bf1e5
--- /dev/null
+++ b/arch/mips/lantiq/prom.c
@@ -0,0 +1,71 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq.h>
+
+#include "prom.h"
+#include "clk.h"
+
+static struct ltq_soc_info soc_info;
+
+unsigned int ltq_get_cpu_ver(void)
+{
+	return soc_info.rev;
+}
+EXPORT_SYMBOL(ltq_get_cpu_ver);
+
+unsigned int ltq_get_soc_type(void)
+{
+	return soc_info.type;
+}
+EXPORT_SYMBOL(ltq_get_soc_type);
+
+const char *get_system_type(void)
+{
+	return soc_info.sys_type;
+}
+
+void prom_free_prom_memory(void)
+{
+}
+
+static void __init prom_init_cmdline(void)
+{
+	int argc = fw_arg0;
+	char **argv = (char **) KSEG1ADDR(fw_arg1);
+	int i;
+
+	for (i = 0; i < argc; i++) {
+		char *p = (char *)  KSEG1ADDR(argv[i]);
+
+		if (p && *p) {
+			strlcat(arcs_cmdline, p, sizeof(arcs_cmdline));
+			strlcat(arcs_cmdline, " ", sizeof(arcs_cmdline));
+		}
+	}
+}
+
+void __init prom_init(void)
+{
+	struct clk *clk;
+
+	ltq_soc_detect(&soc_info);
+	clk_init();
+	clk = clk_get(0, "cpu");
+	snprintf(soc_info.sys_type, LTQ_SYS_TYPE_LEN - 1, "%s rev1.%d",
+		soc_info.name, soc_info.rev);
+	clk_put(clk);
+	soc_info.sys_type[LTQ_SYS_TYPE_LEN - 1] = '\0';
+	pr_info("SoC: %s\n", soc_info.sys_type);
+	prom_init_cmdline();
+}
diff --git a/arch/mips/lantiq/prom.h b/arch/mips/lantiq/prom.h
new file mode 100644
index 000000000000..b4229d94280f
--- /dev/null
+++ b/arch/mips/lantiq/prom.h
@@ -0,0 +1,25 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_PROM_H__
+#define _LTQ_PROM_H__
+
+#define LTQ_SYS_TYPE_LEN	0x100
+
+struct ltq_soc_info {
+	unsigned char *name;
+	unsigned int rev;
+	unsigned int partnum;
+	unsigned int type;
+	unsigned char sys_type[LTQ_SYS_TYPE_LEN];
+};
+
+extern void ltq_soc_detect(struct ltq_soc_info *i);
+extern void ltq_soc_setup(void);
+
+#endif
diff --git a/arch/mips/lantiq/setup.c b/arch/mips/lantiq/setup.c
new file mode 100644
index 000000000000..9b8af77ed0f9
--- /dev/null
+++ b/arch/mips/lantiq/setup.c
@@ -0,0 +1,66 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <asm/bootinfo.h>
+
+#include <lantiq_soc.h>
+
+#include "machtypes.h"
+#include "devices.h"
+#include "prom.h"
+
+void __init plat_mem_setup(void)
+{
+	/* assume 16M as default incase uboot fails to pass proper ramsize */
+	unsigned long memsize = 16;
+	char **envp = (char **) KSEG1ADDR(fw_arg2);
+
+	ioport_resource.start = IOPORT_RESOURCE_START;
+	ioport_resource.end = IOPORT_RESOURCE_END;
+	iomem_resource.start = IOMEM_RESOURCE_START;
+	iomem_resource.end = IOMEM_RESOURCE_END;
+
+	set_io_port_base((unsigned long) KSEG1);
+
+	while (*envp) {
+		char *e = (char *)KSEG1ADDR(*envp);
+		if (!strncmp(e, "memsize=", 8)) {
+			e += 8;
+			if (strict_strtoul(e, 0, &memsize))
+				pr_warn("bad memsize specified\n");
+		}
+		envp++;
+	}
+	memsize *= 1024 * 1024;
+	add_memory_region(0x00000000, memsize, BOOT_MEM_RAM);
+}
+
+static int __init
+lantiq_setup(void)
+{
+	ltq_soc_setup();
+	mips_machine_setup();
+	return 0;
+}
+
+arch_initcall(lantiq_setup);
+
+static void __init
+lantiq_generic_init(void)
+{
+	/* Nothing to do */
+}
+
+MIPS_MACHINE(LTQ_MACH_GENERIC,
+	     "Generic",
+	     "Generic Lantiq based board",
+	     lantiq_generic_init);
diff --git a/arch/mips/lantiq/xway/Kconfig b/arch/mips/lantiq/xway/Kconfig
new file mode 100644
index 000000000000..2b857de36620
--- /dev/null
+++ b/arch/mips/lantiq/xway/Kconfig
@@ -0,0 +1,23 @@
+if SOC_XWAY
+
+menu "MIPS Machine"
+
+config LANTIQ_MACH_EASY50712
+	bool "Easy50712 - Danube"
+	default y
+
+endmenu
+
+endif
+
+if SOC_AMAZON_SE
+
+menu "MIPS Machine"
+
+config LANTIQ_MACH_EASY50601
+	bool "Easy50601 - Amazon SE"
+	default y
+
+endmenu
+
+endif
diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
new file mode 100644
index 000000000000..c517f2e77563
--- /dev/null
+++ b/arch/mips/lantiq/xway/Makefile
@@ -0,0 +1,7 @@
+obj-y := pmu.o ebu.o reset.o gpio.o gpio_stp.o gpio_ebu.o devices.o dma.o
+
+obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o setup-xway.o
+obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o setup-ase.o
+
+obj-$(CONFIG_LANTIQ_MACH_EASY50712) += mach-easy50712.o
+obj-$(CONFIG_LANTIQ_MACH_EASY50601) += mach-easy50601.o
diff --git a/arch/mips/lantiq/xway/clk-ase.c b/arch/mips/lantiq/xway/clk-ase.c
new file mode 100644
index 000000000000..22d823acd536
--- /dev/null
+++ b/arch/mips/lantiq/xway/clk-ase.c
@@ -0,0 +1,48 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+/* cgu registers */
+#define LTQ_CGU_SYS	0x0010
+
+unsigned int ltq_get_io_region_clock(void)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_io_region_clock);
+
+unsigned int ltq_get_fpi_bus_clock(int fpi)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_fpi_bus_clock);
+
+unsigned int ltq_get_cpu_hz(void)
+{
+	if (ltq_cgu_r32(LTQ_CGU_SYS) & (1 << 5))
+		return CLOCK_266M;
+	else
+		return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_cpu_hz);
+
+unsigned int ltq_get_fpi_hz(void)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_fpi_hz);
diff --git a/arch/mips/lantiq/xway/clk-xway.c b/arch/mips/lantiq/xway/clk-xway.c
new file mode 100644
index 000000000000..ddd39593c581
--- /dev/null
+++ b/arch/mips/lantiq/xway/clk-xway.c
@@ -0,0 +1,223 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+static unsigned int ltq_ram_clocks[] = {
+	CLOCK_167M, CLOCK_133M, CLOCK_111M, CLOCK_83M };
+#define DDR_HZ ltq_ram_clocks[ltq_cgu_r32(LTQ_CGU_SYS) & 0x3]
+
+#define BASIC_FREQUENCY_1	35328000
+#define BASIC_FREQUENCY_2	36000000
+#define BASIS_REQUENCY_USB	12000000
+
+#define GET_BITS(x, msb, lsb) \
+	(((x) & ((1 << ((msb) + 1)) - 1)) >> (lsb))
+
+#define LTQ_CGU_PLL0_CFG	0x0004
+#define LTQ_CGU_PLL1_CFG	0x0008
+#define LTQ_CGU_PLL2_CFG	0x000C
+#define LTQ_CGU_SYS		0x0010
+#define LTQ_CGU_UPDATE		0x0014
+#define LTQ_CGU_IF_CLK		0x0018
+#define LTQ_CGU_OSC_CON		0x001C
+#define LTQ_CGU_SMD		0x0020
+#define LTQ_CGU_CT1SR		0x0028
+#define LTQ_CGU_CT2SR		0x002C
+#define LTQ_CGU_PCMCR		0x0030
+#define LTQ_CGU_PCI_CR		0x0034
+#define LTQ_CGU_PD_PC		0x0038
+#define LTQ_CGU_FMR		0x003C
+
+#define CGU_PLL0_PHASE_DIVIDER_ENABLE	\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 31))
+#define CGU_PLL0_BYPASS			\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 30))
+#define CGU_PLL0_CFG_DSMSEL		\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 28))
+#define CGU_PLL0_CFG_FRAC_EN		\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 27))
+#define CGU_PLL1_SRC			\
+	(ltq_cgu_r32(LTQ_CGU_PLL1_CFG) & (1 << 31))
+#define CGU_PLL2_PHASE_DIVIDER_ENABLE	\
+	(ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & (1 << 20))
+#define CGU_SYS_FPI_SEL			(1 << 6)
+#define CGU_SYS_DDR_SEL			0x3
+#define CGU_PLL0_SRC			(1 << 29)
+
+#define CGU_PLL0_CFG_PLLK	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 26, 17)
+#define CGU_PLL0_CFG_PLLN	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 12, 6)
+#define CGU_PLL0_CFG_PLLM	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 5, 2)
+#define CGU_PLL2_SRC		GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 18, 17)
+#define CGU_PLL2_CFG_INPUT_DIV	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 16, 13)
+
+static unsigned int ltq_get_pll0_fdiv(void);
+
+static inline unsigned int get_input_clock(int pll)
+{
+	switch (pll) {
+	case 0:
+		if (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & CGU_PLL0_SRC)
+			return BASIS_REQUENCY_USB;
+		else if (CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			return BASIC_FREQUENCY_1;
+		else
+			return BASIC_FREQUENCY_2;
+	case 1:
+		if (CGU_PLL1_SRC)
+			return BASIS_REQUENCY_USB;
+		else if (CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			return BASIC_FREQUENCY_1;
+		else
+			return BASIC_FREQUENCY_2;
+	case 2:
+		switch (CGU_PLL2_SRC) {
+		case 0:
+			return ltq_get_pll0_fdiv();
+		case 1:
+			return CGU_PLL2_PHASE_DIVIDER_ENABLE ?
+				BASIC_FREQUENCY_1 :
+				BASIC_FREQUENCY_2;
+		case 2:
+			return BASIS_REQUENCY_USB;
+		}
+	default:
+		return 0;
+	}
+}
+
+static inline unsigned int cal_dsm(int pll, unsigned int num, unsigned int den)
+{
+	u64 res, clock = get_input_clock(pll);
+
+	res = num * clock;
+	do_div(res, den);
+	return res;
+}
+
+static inline unsigned int mash_dsm(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = ((N + 1) << 10) + K;
+	unsigned int den = (M + 1) << 10;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int ssff_dsm_1(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = ((N + 1) << 11) + K + 512;
+	unsigned int den = (M + 1) << 11;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int ssff_dsm_2(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = K >= 512 ?
+		((N + 1) << 12) + K - 512 : ((N + 1) << 12) + K + 3584;
+	unsigned int den = (M + 1) << 12;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int dsm(int pll, unsigned int M, unsigned int N,
+	unsigned int K, unsigned int dsmsel, unsigned int phase_div_en)
+{
+	if (!dsmsel)
+		return mash_dsm(pll, M, N, K);
+	else if (!phase_div_en)
+		return mash_dsm(pll, M, N, K);
+	else
+		return ssff_dsm_2(pll, M, N, K);
+}
+
+static inline unsigned int ltq_get_pll0_fosc(void)
+{
+	if (CGU_PLL0_BYPASS)
+		return get_input_clock(0);
+	else
+		return !CGU_PLL0_CFG_FRAC_EN
+			? dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN, 0,
+				CGU_PLL0_CFG_DSMSEL,
+				CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			: dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN,
+				CGU_PLL0_CFG_PLLK, CGU_PLL0_CFG_DSMSEL,
+				CGU_PLL0_PHASE_DIVIDER_ENABLE);
+}
+
+static unsigned int ltq_get_pll0_fdiv(void)
+{
+	unsigned int div = CGU_PLL2_CFG_INPUT_DIV + 1;
+
+	return (ltq_get_pll0_fosc() + (div >> 1)) / div;
+}
+
+unsigned int ltq_get_io_region_clock(void)
+{
+	unsigned int ret = ltq_get_pll0_fosc();
+
+	switch (ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & CGU_SYS_DDR_SEL) {
+	default:
+	case 0:
+		return (ret + 1) / 2;
+	case 1:
+		return (ret * 2 + 2) / 5;
+	case 2:
+		return (ret + 1) / 3;
+	case 3:
+		return (ret + 2) / 4;
+	}
+}
+EXPORT_SYMBOL(ltq_get_io_region_clock);
+
+unsigned int ltq_get_fpi_bus_clock(int fpi)
+{
+	unsigned int ret = ltq_get_io_region_clock();
+
+	if ((fpi == 2) && (ltq_cgu_r32(LTQ_CGU_SYS) & CGU_SYS_FPI_SEL))
+		ret >>= 1;
+	return ret;
+}
+EXPORT_SYMBOL(ltq_get_fpi_bus_clock);
+
+unsigned int ltq_get_cpu_hz(void)
+{
+	switch (ltq_cgu_r32(LTQ_CGU_SYS) & 0xc) {
+	case 0:
+		return CLOCK_333M;
+	case 4:
+		return DDR_HZ;
+	case 8:
+		return DDR_HZ << 1;
+	default:
+		return DDR_HZ >> 1;
+	}
+}
+EXPORT_SYMBOL(ltq_get_cpu_hz);
+
+unsigned int ltq_get_fpi_hz(void)
+{
+	unsigned int ddr_clock = DDR_HZ;
+
+	if (ltq_cgu_r32(LTQ_CGU_SYS) & 0x40)
+		return ddr_clock >> 1;
+	return ddr_clock;
+}
+EXPORT_SYMBOL(ltq_get_fpi_hz);
diff --git a/arch/mips/lantiq/xway/devices.c b/arch/mips/lantiq/xway/devices.c
new file mode 100644
index 000000000000..e09e789dfc27
--- /dev/null
+++ b/arch/mips/lantiq/xway/devices.c
@@ -0,0 +1,121 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/mtd/physmap.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/etherdevice.h>
+#include <linux/reboot.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+
+#include <lantiq_soc.h>
+#include <lantiq_irq.h>
+#include <lantiq_platform.h>
+
+#include "devices.h"
+
+/* gpio */
+static struct resource ltq_gpio_resource[] = {
+	{
+		.name	= "gpio0",
+		.start  = LTQ_GPIO0_BASE_ADDR,
+		.end    = LTQ_GPIO0_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name	= "gpio1",
+		.start  = LTQ_GPIO1_BASE_ADDR,
+		.end    = LTQ_GPIO1_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name	= "gpio2",
+		.start  = LTQ_GPIO2_BASE_ADDR,
+		.end    = LTQ_GPIO2_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}
+};
+
+void __init ltq_register_gpio(void)
+{
+	platform_device_register_simple("ltq_gpio", 0,
+		&ltq_gpio_resource[0], 1);
+	platform_device_register_simple("ltq_gpio", 1,
+		&ltq_gpio_resource[1], 1);
+
+	/* AR9 and VR9 have an extra gpio block */
+	if (ltq_is_ar9() || ltq_is_vr9()) {
+		platform_device_register_simple("ltq_gpio", 2,
+			&ltq_gpio_resource[2], 1);
+	}
+}
+
+/* serial to parallel conversion */
+static struct resource ltq_stp_resource = {
+	.name   = "stp",
+	.start  = LTQ_STP_BASE_ADDR,
+	.end    = LTQ_STP_BASE_ADDR + LTQ_STP_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+void __init ltq_register_gpio_stp(void)
+{
+	platform_device_register_simple("ltq_stp", 0, &ltq_stp_resource, 1);
+}
+
+/* asc ports - amazon se has its own serial mapping */
+static struct resource ltq_ase_asc_resources[] = {
+	{
+		.name	= "asc0",
+		.start  = LTQ_ASC1_BASE_ADDR,
+		.end    = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_ASE_TIR),
+	IRQ_RES(rx, LTQ_ASC_ASE_RIR),
+	IRQ_RES(err, LTQ_ASC_ASE_EIR),
+};
+
+void __init ltq_register_ase_asc(void)
+{
+	platform_device_register_simple("ltq_asc", 0,
+		ltq_ase_asc_resources, ARRAY_SIZE(ltq_ase_asc_resources));
+}
+
+/* ethernet */
+static struct resource ltq_etop_resources = {
+	.name	= "etop",
+	.start	= LTQ_ETOP_BASE_ADDR,
+	.end	= LTQ_ETOP_BASE_ADDR + LTQ_ETOP_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct platform_device ltq_etop = {
+	.name		= "ltq_etop",
+	.resource	= &ltq_etop_resources,
+	.num_resources	= 1,
+};
+
+void __init
+ltq_register_etop(struct ltq_eth_data *eth)
+{
+	if (eth) {
+		ltq_etop.dev.platform_data = eth;
+		platform_device_register(&ltq_etop);
+	}
+}
diff --git a/arch/mips/lantiq/xway/devices.h b/arch/mips/lantiq/xway/devices.h
new file mode 100644
index 000000000000..e90493471bc1
--- /dev/null
+++ b/arch/mips/lantiq/xway/devices.h
@@ -0,0 +1,20 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_DEVICES_XWAY_H__
+#define _LTQ_DEVICES_XWAY_H__
+
+#include "../devices.h"
+#include <linux/phy.h>
+
+extern void ltq_register_gpio(void);
+extern void ltq_register_gpio_stp(void);
+extern void ltq_register_ase_asc(void);
+extern void ltq_register_etop(struct ltq_eth_data *eth);
+
+#endif
diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
new file mode 100644
index 000000000000..4278a459d6c4
--- /dev/null
+++ b/arch/mips/lantiq/xway/dma.c
@@ -0,0 +1,253 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+
+#include <lantiq_soc.h>
+#include <xway_dma.h>
+
+#define LTQ_DMA_CTRL		0x10
+#define LTQ_DMA_CPOLL		0x14
+#define LTQ_DMA_CS		0x18
+#define LTQ_DMA_CCTRL		0x1C
+#define LTQ_DMA_CDBA		0x20
+#define LTQ_DMA_CDLEN		0x24
+#define LTQ_DMA_CIS		0x28
+#define LTQ_DMA_CIE		0x2C
+#define LTQ_DMA_PS		0x40
+#define LTQ_DMA_PCTRL		0x44
+#define LTQ_DMA_IRNEN		0xf4
+
+#define DMA_DESCPT		BIT(3)		/* descriptor complete irq */
+#define DMA_TX			BIT(8)		/* TX channel direction */
+#define DMA_CHAN_ON		BIT(0)		/* channel on / off bit */
+#define DMA_PDEN		BIT(6)		/* enable packet drop */
+#define DMA_CHAN_RST		BIT(1)		/* channel on / off bit */
+#define DMA_RESET		BIT(0)		/* channel on / off bit */
+#define DMA_IRQ_ACK		0x7e		/* IRQ status register */
+#define DMA_POLL		BIT(31)		/* turn on channel polling */
+#define DMA_CLK_DIV4		BIT(6)		/* polling clock divider */
+#define DMA_2W_BURST		BIT(1)		/* 2 word burst length */
+#define DMA_MAX_CHANNEL		20		/* the soc has 20 channels */
+#define DMA_ETOP_ENDIANESS	(0xf << 8) /* endianess swap etop channels */
+#define DMA_WEIGHT	(BIT(17) | BIT(16))	/* default channel wheight */
+
+#define ltq_dma_r32(x)			ltq_r32(ltq_dma_membase + (x))
+#define ltq_dma_w32(x, y)		ltq_w32(x, ltq_dma_membase + (y))
+#define ltq_dma_w32_mask(x, y, z)	ltq_w32_mask(x, y, \
+						ltq_dma_membase + (z))
+
+static struct resource ltq_dma_resource = {
+	.name	= "dma",
+	.start	= LTQ_DMA_BASE_ADDR,
+	.end	= LTQ_DMA_BASE_ADDR + LTQ_DMA_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_dma_membase;
+
+void
+ltq_dma_enable_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_enable_irq);
+
+void
+ltq_dma_disable_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(1 << ch->nr, 0, LTQ_DMA_IRNEN);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_disable_irq);
+
+void
+ltq_dma_ack_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32(DMA_IRQ_ACK, LTQ_DMA_CIS);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_ack_irq);
+
+void
+ltq_dma_open(struct ltq_dma_channel *ch)
+{
+	unsigned long flag;
+
+	local_irq_save(flag);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(0, DMA_CHAN_ON, LTQ_DMA_CCTRL);
+	ltq_dma_enable_irq(ch);
+	local_irq_restore(flag);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_open);
+
+void
+ltq_dma_close(struct ltq_dma_channel *ch)
+{
+	unsigned long flag;
+
+	local_irq_save(flag);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	ltq_dma_disable_irq(ch);
+	local_irq_restore(flag);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_close);
+
+static void
+ltq_dma_alloc(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ch->desc = 0;
+	ch->desc_base = dma_alloc_coherent(NULL,
+				LTQ_DESC_NUM * LTQ_DESC_SIZE,
+				&ch->phys, GFP_ATOMIC);
+	memset(ch->desc_base, 0, LTQ_DESC_NUM * LTQ_DESC_SIZE);
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32(ch->phys, LTQ_DMA_CDBA);
+	ltq_dma_w32(LTQ_DESC_NUM, LTQ_DMA_CDLEN);
+	ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	wmb();
+	ltq_dma_w32_mask(0, DMA_CHAN_RST, LTQ_DMA_CCTRL);
+	while (ltq_dma_r32(LTQ_DMA_CCTRL) & DMA_CHAN_RST)
+		;
+	local_irq_restore(flags);
+}
+
+void
+ltq_dma_alloc_tx(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ltq_dma_alloc(ch);
+
+	local_irq_save(flags);
+	ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	ltq_dma_w32(DMA_WEIGHT | DMA_TX, LTQ_DMA_CCTRL);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_alloc_tx);
+
+void
+ltq_dma_alloc_rx(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ltq_dma_alloc(ch);
+
+	local_irq_save(flags);
+	ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	ltq_dma_w32(DMA_WEIGHT, LTQ_DMA_CCTRL);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_alloc_rx);
+
+void
+ltq_dma_free(struct ltq_dma_channel *ch)
+{
+	if (!ch->desc_base)
+		return;
+	ltq_dma_close(ch);
+	dma_free_coherent(NULL, LTQ_DESC_NUM * LTQ_DESC_SIZE,
+		ch->desc_base, ch->phys);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_free);
+
+void
+ltq_dma_init_port(int p)
+{
+	ltq_dma_w32(p, LTQ_DMA_PS);
+	switch (p) {
+	case DMA_PORT_ETOP:
+		/*
+		 * Tell the DMA engine to swap the endianess of data frames and
+		 * drop packets if the channel arbitration fails.
+		 */
+		ltq_dma_w32_mask(0, DMA_ETOP_ENDIANESS | DMA_PDEN,
+			LTQ_DMA_PCTRL);
+		break;
+
+	case DMA_PORT_DEU:
+		ltq_dma_w32((DMA_2W_BURST << 4) | (DMA_2W_BURST << 2),
+			LTQ_DMA_PCTRL);
+		break;
+
+	default:
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(ltq_dma_init_port);
+
+int __init
+ltq_dma_init(void)
+{
+	int i;
+
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_dma_resource) < 0)
+		panic("Failed to insert dma memory\n");
+
+	if (request_mem_region(ltq_dma_resource.start,
+			resource_size(&ltq_dma_resource), "dma") < 0)
+		panic("Failed to request dma memory\n");
+
+	/* remap dma register range */
+	ltq_dma_membase = ioremap_nocache(ltq_dma_resource.start,
+				resource_size(&ltq_dma_resource));
+	if (!ltq_dma_membase)
+		panic("Failed to remap dma memory\n");
+
+	/* power up and reset the dma engine */
+	ltq_pmu_enable(PMU_DMA);
+	ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL);
+
+	/* disable all interrupts */
+	ltq_dma_w32(0, LTQ_DMA_IRNEN);
+
+	/* reset/configure each channel */
+	for (i = 0; i < DMA_MAX_CHANNEL; i++) {
+		ltq_dma_w32(i, LTQ_DMA_CS);
+		ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL);
+		ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL);
+		ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	}
+	return 0;
+}
+
+postcore_initcall(ltq_dma_init);
diff --git a/arch/mips/lantiq/xway/ebu.c b/arch/mips/lantiq/xway/ebu.c
new file mode 100644
index 000000000000..66eb52fa50a1
--- /dev/null
+++ b/arch/mips/lantiq/xway/ebu.c
@@ -0,0 +1,53 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  EBU - the external bus unit attaches PCI, NOR and NAND
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/ioport.h>
+
+#include <lantiq_soc.h>
+
+/* all access to the ebu must be locked */
+DEFINE_SPINLOCK(ebu_lock);
+EXPORT_SYMBOL_GPL(ebu_lock);
+
+static struct resource ltq_ebu_resource = {
+	.name	= "ebu",
+	.start	= LTQ_EBU_BASE_ADDR,
+	.end	= LTQ_EBU_BASE_ADDR + LTQ_EBU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+/* remapped base addr of the clock unit and external bus unit */
+void __iomem *ltq_ebu_membase;
+
+static int __init lantiq_ebu_init(void)
+{
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_ebu_resource) < 0)
+		panic("Failed to insert ebu memory\n");
+
+	if (request_mem_region(ltq_ebu_resource.start,
+			resource_size(&ltq_ebu_resource), "ebu") < 0)
+		panic("Failed to request ebu memory\n");
+
+	/* remap ebu register range */
+	ltq_ebu_membase = ioremap_nocache(ltq_ebu_resource.start,
+				resource_size(&ltq_ebu_resource));
+	if (!ltq_ebu_membase)
+		panic("Failed to remap ebu memory\n");
+
+	/* make sure to unprotect the memory region where flash is located */
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_BUSCON0) & ~EBU_WRDIS, LTQ_EBU_BUSCON0);
+	return 0;
+}
+
+postcore_initcall(lantiq_ebu_init);
diff --git a/arch/mips/lantiq/xway/gpio.c b/arch/mips/lantiq/xway/gpio.c
new file mode 100644
index 000000000000..a321451a5455
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio.c
@@ -0,0 +1,195 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+
+#include <lantiq_soc.h>
+
+#define LTQ_GPIO_OUT		0x00
+#define LTQ_GPIO_IN		0x04
+#define LTQ_GPIO_DIR		0x08
+#define LTQ_GPIO_ALTSEL0	0x0C
+#define LTQ_GPIO_ALTSEL1	0x10
+#define LTQ_GPIO_OD		0x14
+
+#define PINS_PER_PORT		16
+#define MAX_PORTS		3
+
+#define ltq_gpio_getbit(m, r, p)	(!!(ltq_r32(m + r) & (1 << p)))
+#define ltq_gpio_setbit(m, r, p)	ltq_w32_mask(0, (1 << p), m + r)
+#define ltq_gpio_clearbit(m, r, p)	ltq_w32_mask((1 << p), 0, m + r)
+
+struct ltq_gpio {
+	void __iomem *membase;
+	struct gpio_chip chip;
+};
+
+static struct ltq_gpio ltq_gpio_port[MAX_PORTS];
+
+int gpio_to_irq(unsigned int gpio)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(gpio_to_irq);
+
+int irq_to_gpio(unsigned int gpio)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(irq_to_gpio);
+
+int ltq_gpio_request(unsigned int pin, unsigned int alt0,
+	unsigned int alt1, unsigned int dir, const char *name)
+{
+	int id = 0;
+
+	if (pin >= (MAX_PORTS * PINS_PER_PORT))
+		return -EINVAL;
+	if (gpio_request(pin, name)) {
+		pr_err("failed to setup lantiq gpio: %s\n", name);
+		return -EBUSY;
+	}
+	if (dir)
+		gpio_direction_output(pin, 1);
+	else
+		gpio_direction_input(pin);
+	while (pin >= PINS_PER_PORT) {
+		pin -= PINS_PER_PORT;
+		id++;
+	}
+	if (alt0)
+		ltq_gpio_setbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL0, pin);
+	else
+		ltq_gpio_clearbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL0, pin);
+	if (alt1)
+		ltq_gpio_setbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL1, pin);
+	else
+		ltq_gpio_clearbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL1, pin);
+	return 0;
+}
+EXPORT_SYMBOL(ltq_gpio_request);
+
+static void ltq_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	if (value)
+		ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset);
+	else
+		ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset);
+}
+
+static int ltq_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	return ltq_gpio_getbit(ltq_gpio->membase, LTQ_GPIO_IN, offset);
+}
+
+static int ltq_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OD, offset);
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset);
+
+	return 0;
+}
+
+static int ltq_gpio_direction_output(struct gpio_chip *chip,
+	unsigned int offset, int value)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OD, offset);
+	ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset);
+	ltq_gpio_set(chip, offset, value);
+
+	return 0;
+}
+
+static int ltq_gpio_req(struct gpio_chip *chip, unsigned offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL0, offset);
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL1, offset);
+	return 0;
+}
+
+static int ltq_gpio_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	if (pdev->id >= MAX_PORTS) {
+		dev_err(&pdev->dev, "invalid gpio port %d\n",
+			pdev->id);
+		return -EINVAL;
+	}
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get memory for gpio port %d\n",
+			pdev->id);
+		return -ENOENT;
+	}
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev,
+			"failed to request memory for gpio port %d\n",
+			pdev->id);
+		return -EBUSY;
+	}
+	ltq_gpio_port[pdev->id].membase = devm_ioremap_nocache(&pdev->dev,
+		res->start, resource_size(res));
+	if (!ltq_gpio_port[pdev->id].membase) {
+		dev_err(&pdev->dev, "failed to remap memory for gpio port %d\n",
+			pdev->id);
+		return -ENOMEM;
+	}
+	ltq_gpio_port[pdev->id].chip.label = "ltq_gpio";
+	ltq_gpio_port[pdev->id].chip.direction_input = ltq_gpio_direction_input;
+	ltq_gpio_port[pdev->id].chip.direction_output =
+		ltq_gpio_direction_output;
+	ltq_gpio_port[pdev->id].chip.get = ltq_gpio_get;
+	ltq_gpio_port[pdev->id].chip.set = ltq_gpio_set;
+	ltq_gpio_port[pdev->id].chip.request = ltq_gpio_req;
+	ltq_gpio_port[pdev->id].chip.base = PINS_PER_PORT * pdev->id;
+	ltq_gpio_port[pdev->id].chip.ngpio = PINS_PER_PORT;
+	platform_set_drvdata(pdev, &ltq_gpio_port[pdev->id]);
+	return gpiochip_add(&ltq_gpio_port[pdev->id].chip);
+}
+
+static struct platform_driver
+ltq_gpio_driver = {
+	.probe = ltq_gpio_probe,
+	.driver = {
+		.name = "ltq_gpio",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init ltq_gpio_init(void)
+{
+	int ret = platform_driver_register(&ltq_gpio_driver);
+
+	if (ret)
+		pr_info("ltq_gpio : Error registering platfom driver!");
+	return ret;
+}
+
+postcore_initcall(ltq_gpio_init);
diff --git a/arch/mips/lantiq/xway/gpio_ebu.c b/arch/mips/lantiq/xway/gpio_ebu.c
new file mode 100644
index 000000000000..a479355abdb9
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio_ebu.c
@@ -0,0 +1,126 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+
+#include <lantiq_soc.h>
+
+/*
+ * By attaching hardware latches to the EBU it is possible to create output
+ * only gpios. This driver configures a special memory address, which when
+ * written to outputs 16 bit to the latches.
+ */
+
+#define LTQ_EBU_BUSCON	0x1e7ff		/* 16 bit access, slowest timing */
+#define LTQ_EBU_WP	0x80000000	/* write protect bit */
+
+/* we keep a shadow value of the last value written to the ebu */
+static int ltq_ebu_gpio_shadow = 0x0;
+static void __iomem *ltq_ebu_gpio_membase;
+
+static void ltq_ebu_apply(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+	ltq_ebu_w32(LTQ_EBU_BUSCON, LTQ_EBU_BUSCON1);
+	*((__u16 *)ltq_ebu_gpio_membase) = ltq_ebu_gpio_shadow;
+	ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1);
+	spin_unlock_irqrestore(&ebu_lock, flags);
+}
+
+static void ltq_ebu_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (value)
+		ltq_ebu_gpio_shadow |= (1 << offset);
+	else
+		ltq_ebu_gpio_shadow &= ~(1 << offset);
+	ltq_ebu_apply();
+}
+
+static int ltq_ebu_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	ltq_ebu_set(chip, offset, value);
+
+	return 0;
+}
+
+static struct gpio_chip ltq_ebu_chip = {
+	.label = "ltq_ebu",
+	.direction_output = ltq_ebu_direction_output,
+	.set = ltq_ebu_set,
+	.base = 72,
+	.ngpio = 16,
+	.can_sleep = 1,
+	.owner = THIS_MODULE,
+};
+
+static int ltq_ebu_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get memory resource\n");
+		return -ENOENT;
+	}
+
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request memory resource\n");
+		return -EBUSY;
+	}
+
+	ltq_ebu_gpio_membase = devm_ioremap_nocache(&pdev->dev, res->start,
+		resource_size(res));
+	if (!ltq_ebu_gpio_membase) {
+		dev_err(&pdev->dev, "Failed to ioremap mem region\n");
+		return -ENOMEM;
+	}
+
+	/* grab the default shadow value passed form the platform code */
+	ltq_ebu_gpio_shadow = (unsigned int) pdev->dev.platform_data;
+
+	/* tell the ebu controller which memory address we will be using */
+	ltq_ebu_w32(pdev->resource->start | 0x1, LTQ_EBU_ADDRSEL1);
+
+	/* write protect the region */
+	ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1);
+
+	ret = gpiochip_add(&ltq_ebu_chip);
+	if (!ret)
+		ltq_ebu_apply();
+	return ret;
+}
+
+static struct platform_driver ltq_ebu_driver = {
+	.probe = ltq_ebu_probe,
+	.driver = {
+		.name = "ltq_ebu",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init ltq_ebu_init(void)
+{
+	int ret = platform_driver_register(&ltq_ebu_driver);
+
+	if (ret)
+		pr_info("ltq_ebu : Error registering platfom driver!");
+	return ret;
+}
+
+postcore_initcall(ltq_ebu_init);
diff --git a/arch/mips/lantiq/xway/gpio_stp.c b/arch/mips/lantiq/xway/gpio_stp.c
new file mode 100644
index 000000000000..67d59d690340
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio_stp.c
@@ -0,0 +1,157 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2007 John Crispin <blogic@openwrt.org>
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+
+#include <lantiq_soc.h>
+
+#define LTQ_STP_CON0		0x00
+#define LTQ_STP_CON1		0x04
+#define LTQ_STP_CPU0		0x08
+#define LTQ_STP_CPU1		0x0C
+#define LTQ_STP_AR		0x10
+
+#define LTQ_STP_CON_SWU		(1 << 31)
+#define LTQ_STP_2HZ		0
+#define LTQ_STP_4HZ		(1 << 23)
+#define LTQ_STP_8HZ		(2 << 23)
+#define LTQ_STP_10HZ		(3 << 23)
+#define LTQ_STP_SPEED_MASK	(0xf << 23)
+#define LTQ_STP_UPD_FPI		(1 << 31)
+#define LTQ_STP_UPD_MASK	(3 << 30)
+#define LTQ_STP_ADSL_SRC	(3 << 24)
+
+#define LTQ_STP_GROUP0		(1 << 0)
+
+#define LTQ_STP_RISING		0
+#define LTQ_STP_FALLING		(1 << 26)
+#define LTQ_STP_EDGE_MASK	(1 << 26)
+
+#define ltq_stp_r32(reg)	__raw_readl(ltq_stp_membase + reg)
+#define ltq_stp_w32(val, reg)	__raw_writel(val, ltq_stp_membase + reg)
+#define ltq_stp_w32_mask(clear, set, reg) \
+		ltq_w32((ltq_r32(ltq_stp_membase + reg) & ~(clear)) | (set), \
+		ltq_stp_membase + (reg))
+
+static int ltq_stp_shadow = 0xffff;
+static void __iomem *ltq_stp_membase;
+
+static void ltq_stp_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (value)
+		ltq_stp_shadow |= (1 << offset);
+	else
+		ltq_stp_shadow &= ~(1 << offset);
+	ltq_stp_w32(ltq_stp_shadow, LTQ_STP_CPU0);
+}
+
+static int ltq_stp_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	ltq_stp_set(chip, offset, value);
+
+	return 0;
+}
+
+static struct gpio_chip ltq_stp_chip = {
+	.label = "ltq_stp",
+	.direction_output = ltq_stp_direction_output,
+	.set = ltq_stp_set,
+	.base = 48,
+	.ngpio = 24,
+	.can_sleep = 1,
+	.owner = THIS_MODULE,
+};
+
+static int ltq_stp_hw_init(void)
+{
+	/* the 3 pins used to control the external stp */
+	ltq_gpio_request(4, 1, 0, 1, "stp-st");
+	ltq_gpio_request(5, 1, 0, 1, "stp-d");
+	ltq_gpio_request(6, 1, 0, 1, "stp-sh");
+
+	/* sane defaults */
+	ltq_stp_w32(0, LTQ_STP_AR);
+	ltq_stp_w32(0, LTQ_STP_CPU0);
+	ltq_stp_w32(0, LTQ_STP_CPU1);
+	ltq_stp_w32(LTQ_STP_CON_SWU, LTQ_STP_CON0);
+	ltq_stp_w32(0, LTQ_STP_CON1);
+
+	/* rising or falling edge */
+	ltq_stp_w32_mask(LTQ_STP_EDGE_MASK, LTQ_STP_FALLING, LTQ_STP_CON0);
+
+	/* per default stp 15-0 are set */
+	ltq_stp_w32_mask(0, LTQ_STP_GROUP0, LTQ_STP_CON1);
+
+	/* stp are update periodically by the FPI bus */
+	ltq_stp_w32_mask(LTQ_STP_UPD_MASK, LTQ_STP_UPD_FPI, LTQ_STP_CON1);
+
+	/* set stp update speed */
+	ltq_stp_w32_mask(LTQ_STP_SPEED_MASK, LTQ_STP_8HZ, LTQ_STP_CON1);
+
+	/* tell the hardware that pin (led) 0 and 1 are controlled
+	 *  by the dsl arc
+	 */
+	ltq_stp_w32_mask(0, LTQ_STP_ADSL_SRC, LTQ_STP_CON0);
+
+	ltq_pmu_enable(PMU_LED);
+	return 0;
+}
+
+static int __devinit ltq_stp_probe(struct platform_device *pdev)
+{
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int ret = 0;
+
+	if (!res)
+		return -ENOENT;
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request STP memory\n");
+		return -EBUSY;
+	}
+	ltq_stp_membase = devm_ioremap_nocache(&pdev->dev, res->start,
+		resource_size(res));
+	if (!ltq_stp_membase) {
+		dev_err(&pdev->dev, "failed to remap STP memory\n");
+		return -ENOMEM;
+	}
+	ret = gpiochip_add(&ltq_stp_chip);
+	if (!ret)
+		ret = ltq_stp_hw_init();
+
+	return ret;
+}
+
+static struct platform_driver ltq_stp_driver = {
+	.probe = ltq_stp_probe,
+	.driver = {
+		.name = "ltq_stp",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init ltq_stp_init(void)
+{
+	int ret = platform_driver_register(&ltq_stp_driver);
+
+	if (ret)
+		pr_info("ltq_stp: error registering platfom driver");
+	return ret;
+}
+
+postcore_initcall(ltq_stp_init);
diff --git a/arch/mips/lantiq/xway/mach-easy50601.c b/arch/mips/lantiq/xway/mach-easy50601.c
new file mode 100644
index 000000000000..d5aaf637ab19
--- /dev/null
+++ b/arch/mips/lantiq/xway/mach-easy50601.c
@@ -0,0 +1,57 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/input.h>
+
+#include <lantiq.h>
+
+#include "../machtypes.h"
+#include "devices.h"
+
+static struct mtd_partition easy50601_partitions[] = {
+	{
+		.name	= "uboot",
+		.offset	= 0x0,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "uboot_env",
+		.offset	= 0x10000,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "linux",
+		.offset	= 0x20000,
+		.size	= 0xE0000,
+	},
+	{
+		.name	= "rootfs",
+		.offset	= 0x100000,
+		.size	= 0x300000,
+	},
+};
+
+static struct physmap_flash_data easy50601_flash_data = {
+	.nr_parts	= ARRAY_SIZE(easy50601_partitions),
+	.parts		= easy50601_partitions,
+};
+
+static void __init easy50601_init(void)
+{
+	ltq_register_nor(&easy50601_flash_data);
+}
+
+MIPS_MACHINE(LTQ_MACH_EASY50601,
+			"EASY50601",
+			"EASY50601 Eval Board",
+			easy50601_init);
diff --git a/arch/mips/lantiq/xway/mach-easy50712.c b/arch/mips/lantiq/xway/mach-easy50712.c
new file mode 100644
index 000000000000..ea5027b3239d
--- /dev/null
+++ b/arch/mips/lantiq/xway/mach-easy50712.c
@@ -0,0 +1,74 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/input.h>
+#include <linux/phy.h>
+
+#include <lantiq_soc.h>
+#include <irq.h>
+
+#include "../machtypes.h"
+#include "devices.h"
+
+static struct mtd_partition easy50712_partitions[] = {
+	{
+		.name	= "uboot",
+		.offset	= 0x0,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "uboot_env",
+		.offset	= 0x10000,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "linux",
+		.offset	= 0x20000,
+		.size	= 0xe0000,
+	},
+	{
+		.name	= "rootfs",
+		.offset	= 0x100000,
+		.size	= 0x300000,
+	},
+};
+
+static struct physmap_flash_data easy50712_flash_data = {
+	.nr_parts	= ARRAY_SIZE(easy50712_partitions),
+	.parts		= easy50712_partitions,
+};
+
+static struct ltq_pci_data ltq_pci_data = {
+	.clock	= PCI_CLOCK_INT,
+	.gpio	= PCI_GNT1 | PCI_REQ1,
+	.irq	= {
+		[14] = INT_NUM_IM0_IRL0 + 22,
+	},
+};
+
+static struct ltq_eth_data ltq_eth_data = {
+	.mii_mode = PHY_INTERFACE_MODE_MII,
+};
+
+static void __init easy50712_init(void)
+{
+	ltq_register_gpio_stp();
+	ltq_register_nor(&easy50712_flash_data);
+	ltq_register_pci(&ltq_pci_data);
+	ltq_register_etop(&ltq_eth_data);
+}
+
+MIPS_MACHINE(LTQ_MACH_EASY50712,
+	     "EASY50712",
+	     "EASY50712 Eval Board",
+	      easy50712_init);
diff --git a/arch/mips/lantiq/xway/pmu.c b/arch/mips/lantiq/xway/pmu.c
new file mode 100644
index 000000000000..9d69f01e352b
--- /dev/null
+++ b/arch/mips/lantiq/xway/pmu.c
@@ -0,0 +1,70 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/ioport.h>
+
+#include <lantiq_soc.h>
+
+/* PMU - the power management unit allows us to turn part of the core
+ * on and off
+ */
+
+/* the enable / disable registers */
+#define LTQ_PMU_PWDCR	0x1C
+#define LTQ_PMU_PWDSR	0x20
+
+#define ltq_pmu_w32(x, y)	ltq_w32((x), ltq_pmu_membase + (y))
+#define ltq_pmu_r32(x)		ltq_r32(ltq_pmu_membase + (x))
+
+static struct resource ltq_pmu_resource = {
+	.name	= "pmu",
+	.start	= LTQ_PMU_BASE_ADDR,
+	.end	= LTQ_PMU_BASE_ADDR + LTQ_PMU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_pmu_membase;
+
+void ltq_pmu_enable(unsigned int module)
+{
+	int err = 1000000;
+
+	ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) & ~module, LTQ_PMU_PWDCR);
+	do {} while (--err && (ltq_pmu_r32(LTQ_PMU_PWDSR) & module));
+
+	if (!err)
+		panic("activating PMU module failed!\n");
+}
+EXPORT_SYMBOL(ltq_pmu_enable);
+
+void ltq_pmu_disable(unsigned int module)
+{
+	ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) | module, LTQ_PMU_PWDCR);
+}
+EXPORT_SYMBOL(ltq_pmu_disable);
+
+int __init ltq_pmu_init(void)
+{
+	if (insert_resource(&iomem_resource, &ltq_pmu_resource) < 0)
+		panic("Failed to insert pmu memory\n");
+
+	if (request_mem_region(ltq_pmu_resource.start,
+			resource_size(&ltq_pmu_resource), "pmu") < 0)
+		panic("Failed to request pmu memory\n");
+
+	ltq_pmu_membase = ioremap_nocache(ltq_pmu_resource.start,
+				resource_size(&ltq_pmu_resource));
+	if (!ltq_pmu_membase)
+		panic("Failed to remap pmu memory\n");
+	return 0;
+}
+
+core_initcall(ltq_pmu_init);
diff --git a/arch/mips/lantiq/xway/prom-ase.c b/arch/mips/lantiq/xway/prom-ase.c
new file mode 100644
index 000000000000..abe49f4db57f
--- /dev/null
+++ b/arch/mips/lantiq/xway/prom-ase.c
@@ -0,0 +1,39 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+
+#define SOC_AMAZON_SE	"Amazon_SE"
+
+#define PART_SHIFT	12
+#define PART_MASK	0x0FFFFFFF
+#define REV_SHIFT	28
+#define REV_MASK	0xF0000000
+
+void __init ltq_soc_detect(struct ltq_soc_info *i)
+{
+	i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT;
+	i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT;
+	switch (i->partnum) {
+	case SOC_ID_AMAZON_SE:
+		i->name = SOC_AMAZON_SE;
+		i->type = SOC_TYPE_AMAZON_SE;
+		break;
+
+	default:
+		unreachable();
+		break;
+	}
+}
diff --git a/arch/mips/lantiq/xway/prom-xway.c b/arch/mips/lantiq/xway/prom-xway.c
new file mode 100644
index 000000000000..1686692ac24d
--- /dev/null
+++ b/arch/mips/lantiq/xway/prom-xway.c
@@ -0,0 +1,54 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+
+#define SOC_DANUBE	"Danube"
+#define SOC_TWINPASS	"Twinpass"
+#define SOC_AR9		"AR9"
+
+#define PART_SHIFT	12
+#define PART_MASK	0x0FFFFFFF
+#define REV_SHIFT	28
+#define REV_MASK	0xF0000000
+
+void __init ltq_soc_detect(struct ltq_soc_info *i)
+{
+	i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT;
+	i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT;
+	switch (i->partnum) {
+	case SOC_ID_DANUBE1:
+	case SOC_ID_DANUBE2:
+		i->name = SOC_DANUBE;
+		i->type = SOC_TYPE_DANUBE;
+		break;
+
+	case SOC_ID_TWINPASS:
+		i->name = SOC_TWINPASS;
+		i->type = SOC_TYPE_DANUBE;
+		break;
+
+	case SOC_ID_ARX188:
+	case SOC_ID_ARX168:
+	case SOC_ID_ARX182:
+		i->name = SOC_AR9;
+		i->type = SOC_TYPE_AR9;
+		break;
+
+	default:
+		unreachable();
+		break;
+	}
+}
diff --git a/arch/mips/lantiq/xway/reset.c b/arch/mips/lantiq/xway/reset.c
new file mode 100644
index 000000000000..a1be36d0e490
--- /dev/null
+++ b/arch/mips/lantiq/xway/reset.c
@@ -0,0 +1,91 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/pm.h>
+#include <linux/module.h>
+#include <asm/reboot.h>
+
+#include <lantiq_soc.h>
+
+#define ltq_rcu_w32(x, y)	ltq_w32((x), ltq_rcu_membase + (y))
+#define ltq_rcu_r32(x)		ltq_r32(ltq_rcu_membase + (x))
+
+/* register definitions */
+#define LTQ_RCU_RST		0x0010
+#define LTQ_RCU_RST_ALL		0x40000000
+
+#define LTQ_RCU_RST_STAT	0x0014
+#define LTQ_RCU_STAT_SHIFT	26
+
+static struct resource ltq_rcu_resource = {
+	.name   = "rcu",
+	.start  = LTQ_RCU_BASE_ADDR,
+	.end    = LTQ_RCU_BASE_ADDR + LTQ_RCU_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+/* remapped base addr of the reset control unit */
+static void __iomem *ltq_rcu_membase;
+
+/* This function is used by the watchdog driver */
+int ltq_reset_cause(void)
+{
+	u32 val = ltq_rcu_r32(LTQ_RCU_RST_STAT);
+	return val >> LTQ_RCU_STAT_SHIFT;
+}
+EXPORT_SYMBOL_GPL(ltq_reset_cause);
+
+static void ltq_machine_restart(char *command)
+{
+	pr_notice("System restart\n");
+	local_irq_disable();
+	ltq_rcu_w32(ltq_rcu_r32(LTQ_RCU_RST) | LTQ_RCU_RST_ALL, LTQ_RCU_RST);
+	unreachable();
+}
+
+static void ltq_machine_halt(void)
+{
+	pr_notice("System halted.\n");
+	local_irq_disable();
+	unreachable();
+}
+
+static void ltq_machine_power_off(void)
+{
+	pr_notice("Please turn off the power now.\n");
+	local_irq_disable();
+	unreachable();
+}
+
+static int __init mips_reboot_setup(void)
+{
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_rcu_resource) < 0)
+		panic("Failed to insert rcu memory\n");
+
+	if (request_mem_region(ltq_rcu_resource.start,
+			resource_size(&ltq_rcu_resource), "rcu") < 0)
+		panic("Failed to request rcu memory\n");
+
+	/* remap rcu register range */
+	ltq_rcu_membase = ioremap_nocache(ltq_rcu_resource.start,
+				resource_size(&ltq_rcu_resource));
+	if (!ltq_rcu_membase)
+		panic("Failed to remap rcu memory\n");
+
+	_machine_restart = ltq_machine_restart;
+	_machine_halt = ltq_machine_halt;
+	pm_power_off = ltq_machine_power_off;
+
+	return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/lantiq/xway/setup-ase.c b/arch/mips/lantiq/xway/setup-ase.c
new file mode 100644
index 000000000000..f6f326798a39
--- /dev/null
+++ b/arch/mips/lantiq/xway/setup-ase.c
@@ -0,0 +1,19 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+#include "devices.h"
+
+void __init ltq_soc_setup(void)
+{
+	ltq_register_ase_asc();
+	ltq_register_gpio();
+	ltq_register_wdt();
+}
diff --git a/arch/mips/lantiq/xway/setup-xway.c b/arch/mips/lantiq/xway/setup-xway.c
new file mode 100644
index 000000000000..c292f643a858
--- /dev/null
+++ b/arch/mips/lantiq/xway/setup-xway.c
@@ -0,0 +1,20 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+#include "devices.h"
+
+void __init ltq_soc_setup(void)
+{
+	ltq_register_asc(0);
+	ltq_register_asc(1);
+	ltq_register_gpio();
+	ltq_register_wdt();
+}
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 2adead5a8a37..b2cad4fd5fc4 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= r3k_dump_tlb.o
 obj-$(CONFIG_CPU_TX49XX)	+= dump_tlb.o
 obj-$(CONFIG_CPU_VR41XX)	+= dump_tlb.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= dump_tlb.o
+obj-$(CONFIG_CPU_XLR)		+= dump_tlb.o
 
 # libgcc-style stuff needed in the kernel
 obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o
diff --git a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
index 8c807c965199..0cb1b9760e34 100644
--- a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
@@ -201,8 +201,6 @@ static struct clocksource clocksource_mfgpt = {
 	.rating = 120, /* Functional for real use, but not desired */
 	.read = mfgpt_read,
 	.mask = CLOCKSOURCE_MASK(32),
-	.mult = 0,
-	.shift = 22,
 };
 
 int __init init_mfgpt_clocksource(void)
@@ -210,8 +208,7 @@ int __init init_mfgpt_clocksource(void)
 	if (num_possible_cpus() > 1)	/* MFGPT does not scale! */
 		return 0;
 
-	clocksource_mfgpt.mult = clocksource_hz2mult(MFGPT_TICK_RATE, 22);
-	return clocksource_register(&clocksource_mfgpt);
+	return clocksource_register_hz(&clocksource_mfgpt, MFGPT_TICK_RATE);
 }
 
 arch_initcall(init_mfgpt_clocksource);
diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c
index 11b193f848f8..d93830ad6113 100644
--- a/arch/mips/loongson/common/env.c
+++ b/arch/mips/loongson/common/env.c
@@ -29,9 +29,10 @@ unsigned long memsize, highmemsize;
 
 #define parse_even_earlier(res, option, p)				\
 do {									\
-	int ret;							\
+	unsigned int tmp __maybe_unused;				\
+									\
 	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
-		ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \
+		tmp = strict_strtol((char *)p + strlen(option"="), 10, &res); \
 } while (0)
 
 void __init prom_init_env(void)
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index d679c772d082..4d8c1623eee2 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -3,7 +3,8 @@
 #
 
 obj-y				+= cache.o dma-default.o extable.o fault.o \
-				   init.o tlbex.o tlbex-fault.o uasm.o page.o
+				   init.o mmap.o tlbex.o tlbex-fault.o uasm.o \
+				   page.o
 
 obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
 obj-$(CONFIG_64BIT)		+= pgtable-64.o
@@ -29,6 +30,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= c-tx39.o tlb-r3k.o
 obj-$(CONFIG_CPU_TX49XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
 obj-$(CONFIG_CPU_VR41XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= c-octeon.o cex-oct.o tlb-r4k.o
+obj-$(CONFIG_CPU_XLR)		+= c-r4k.o tlb-r4k.o cex-gen.o
 
 obj-$(CONFIG_IP22_CPU_SCACHE)	+= sc-ip22.o
 obj-$(CONFIG_R5000_CPU_SCACHE)  += sc-r5k.o
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index b4923a75cb4b..d9bc5d3593b6 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1006,6 +1006,7 @@ static void __cpuinit probe_pcache(void)
 	case CPU_25KF:
 	case CPU_SB1:
 	case CPU_SB1A:
+	case CPU_XLR:
 		c->dcache.flags |= MIPS_CACHE_PINDEX;
 		break;
 
@@ -1075,7 +1076,6 @@ static int __cpuinit probe_scache(void)
 	unsigned long flags, addr, begin, end, pow2;
 	unsigned int config = read_c0_config();
 	struct cpuinfo_mips *c = &current_cpu_data;
-	int tmp;
 
 	if (config & CONF_SC)
 		return 0;
@@ -1108,7 +1108,6 @@ static int __cpuinit probe_scache(void)
 
 	/* Now search for the wrap around point. */
 	pow2 = (128 * 1024);
-	tmp = 0;
 	for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) {
 		cache_op(Index_Load_Tag_SD, addr);
 		__asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
new file mode 100644
index 000000000000..ae3c20a9556e
--- /dev/null
+++ b/arch/mips/mm/mmap.c
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Wind River Systems,
+ *   written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+
+unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
+
+EXPORT_SYMBOL(shm_align_mask);
+
+#define COLOUR_ALIGN(addr,pgoff)				\
+	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
+	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+	unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct vm_area_struct * vmm;
+	int do_color_align;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		/* Even MAP_FIXED mappings must reside within TASK_SIZE.  */
+		if (TASK_SIZE - len < addr)
+			return -EINVAL;
+
+		/*
+		 * We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = 1;
+	if (addr) {
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+		vmm = find_vma(current->mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vmm || addr + len <= vmm->vm_start))
+			return addr;
+	}
+	addr = current->mm->mmap_base;
+	if (do_color_align)
+		addr = COLOUR_ALIGN(addr, pgoff);
+	else
+		addr = PAGE_ALIGN(addr);
+
+	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+		/* At this point:  (!vmm || addr < vmm->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return -ENOMEM;
+		if (!vmm || addr + len <= vmm->vm_start)
+			return addr;
+		addr = vmm->vm_end;
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+	}
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE) {
+		random_factor = get_random_int();
+		random_factor = random_factor << PAGE_SHIFT;
+		if (TASK_IS_32BIT_ADDR)
+			random_factor &= 0xfffffful;
+		else
+			random_factor &= 0xffffffful;
+	}
+
+	mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+	mm->get_unmapped_area = arch_get_unmapped_area;
+	mm->unmap_area = arch_unmap_area;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+	unsigned long rnd = get_random_int();
+
+	rnd = rnd << PAGE_SHIFT;
+	/* 8MB for 32bit, 256MB for 64bit */
+	if (TASK_IS_32BIT_ADDR)
+		rnd = rnd & 0x7ffffful;
+	else
+		rnd = rnd & 0xffffffful;
+
+	return rnd;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long base = mm->brk;
+	unsigned long ret;
+
+	ret = PAGE_ALIGN(base + brk_rnd());
+
+	if (ret < mm->brk)
+		return mm->brk;
+
+	return ret;
+}
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 5ef294fbb6e7..424ed4b92e6d 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -404,6 +404,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_5KC:
 	case CPU_TX49XX:
 	case CPU_PR4450:
+	case CPU_XLR:
 		uasm_i_nop(p);
 		tlbw(p);
 		break;
@@ -1151,8 +1152,8 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	struct uasm_reloc *r = relocs;
 	u32 *f;
 	unsigned int final_len;
-	struct mips_huge_tlb_info htlb_info;
-	enum vmalloc64_mode vmalloc_mode;
+	struct mips_huge_tlb_info htlb_info __maybe_unused;
+	enum vmalloc64_mode vmalloc_mode __maybe_unused;
 
 	memset(tlb_handler, 0, sizeof(tlb_handler));
 	memset(labels, 0, sizeof(labels));
diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index 414f0c99b196..31180c321a1a 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c
@@ -193,8 +193,6 @@ extern struct plat_smp_ops msmtc_smp_ops;
 
 void __init prom_init(void)
 {
-	int result;
-
 	prom_argc = fw_arg0;
 	_prom_argv = (int *) fw_arg1;
 	_prom_envp = (int *) fw_arg2;
@@ -360,20 +358,14 @@ void __init prom_init(void)
 #ifdef CONFIG_SERIAL_8250_CONSOLE
 	console_config();
 #endif
-	/* Early detection of CMP support */
-	result = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
-
 #ifdef CONFIG_MIPS_CMP
-	if (result)
+	/* Early detection of CMP support */
+	if (gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ))
 		register_smp_ops(&cmp_smp_ops);
+	else
 #endif
 #ifdef CONFIG_MIPS_MT_SMP
-#ifdef CONFIG_MIPS_CMP
-	if (!result)
 		register_smp_ops(&vsmp_smp_ops);
-#else
-	register_smp_ops(&vsmp_smp_ops);
-#endif
 #endif
 #ifdef CONFIG_MIPS_MT_SMTC
 	register_smp_ops(&msmtc_smp_ops);
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 9027061f0ead..1d36c511a7a5 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -56,7 +56,6 @@ static DEFINE_RAW_SPINLOCK(mips_irq_lock);
 static inline int mips_pcibios_iack(void)
 {
 	int irq;
-	u32 dummy;
 
 	/*
 	 * Determine highest priority pending interrupt by performing
@@ -83,7 +82,7 @@ static inline int mips_pcibios_iack(void)
 		BONITO_PCIMAP_CFG = 0x20000;
 
 		/* Flush Bonito register block */
-		dummy = BONITO_PCIMAP_CFG;
+		(void) BONITO_PCIMAP_CFG;
 		iob();    /* sync */
 
 		irq = __raw_readl((u32 *)_pcictrl_bonito_pcicfg);
@@ -309,6 +308,8 @@ static void ipi_call_dispatch(void)
 
 static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 {
+	scheduler_ipi();
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig
new file mode 100644
index 000000000000..a5ca743613f2
--- /dev/null
+++ b/arch/mips/netlogic/Kconfig
@@ -0,0 +1,5 @@
+config NLM_COMMON
+	bool
+
+config NLM_XLR
+	bool
diff --git a/arch/mips/netlogic/xlr/Makefile b/arch/mips/netlogic/xlr/Makefile
new file mode 100644
index 000000000000..9bd3f731f62e
--- /dev/null
+++ b/arch/mips/netlogic/xlr/Makefile
@@ -0,0 +1,5 @@
+obj-y				+= setup.o platform.o irq.o setup.o time.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
+obj-$(CONFIG_EARLY_PRINTK)	+= xlr_console.o
+
+EXTRA_CFLAGS			+= -Werror
diff --git a/arch/mips/netlogic/xlr/irq.c b/arch/mips/netlogic/xlr/irq.c
new file mode 100644
index 000000000000..1446d58e364c
--- /dev/null
+++ b/arch/mips/netlogic/xlr/irq.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+#include <asm/mipsregs.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/mips-extns.h>
+
+static u64 nlm_irq_mask;
+static DEFINE_SPINLOCK(nlm_pic_lock);
+
+static void xlr_pic_enable(struct irq_data *d)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	unsigned long flags;
+	nlm_reg_t reg;
+	int irq = d->irq;
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE);
+	netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE,
+			  reg | (1 << 6) | (1 << 30) | (1 << 31));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+static void xlr_pic_mask(struct irq_data *d)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	unsigned long flags;
+	nlm_reg_t reg;
+	int irq = d->irq;
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE);
+	netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE,
+			  reg | (1 << 6) | (1 << 30) | (0 << 31));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+#ifdef CONFIG_PCI
+/* Extra ACK needed for XLR on chip PCI controller */
+static void xlr_pci_ack(struct irq_data *d)
+{
+	nlm_reg_t *pci_mmio = netlogic_io_mmio(NETLOGIC_IO_PCIX_OFFSET);
+
+	netlogic_read_reg(pci_mmio, (0x140 >> 2));
+}
+
+/* Extra ACK needed for XLS on chip PCIe controller */
+static void xls_pcie_ack(struct irq_data *d)
+{
+	nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET);
+
+	switch (d->irq) {
+	case PIC_PCIE_LINK0_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK1_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK2_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK3_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff);
+		break;
+	}
+}
+
+/* For XLS B silicon, the 3,4 PCI interrupts are different */
+static void xls_pcie_ack_b(struct irq_data *d)
+{
+	nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET);
+
+	switch (d->irq) {
+	case PIC_PCIE_LINK0_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK1_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_XLSB0_LINK2_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_XLSB0_LINK3_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff);
+		break;
+	}
+}
+#endif
+
+static void xlr_pic_ack(struct irq_data *d)
+{
+	unsigned long flags;
+	nlm_reg_t *mmio;
+	int irq = d->irq;
+	void *hd = irq_data_get_irq_handler_data(d);
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	if (hd) {
+		void (*extra_ack)(void *) = hd;
+		extra_ack(d);
+	}
+	mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	netlogic_write_reg(mmio, PIC_INT_ACK, (1 << (irq - PIC_IRQ_BASE)));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+/*
+ * This chip definition handles interrupts routed thru the XLR
+ * hardware PIC, currently IRQs 8-39 are mapped to hardware intr
+ * 0-31 wired the XLR PIC
+ */
+static struct irq_chip xlr_pic = {
+	.name		= "XLR-PIC",
+	.irq_enable	= xlr_pic_enable,
+	.irq_mask	= xlr_pic_mask,
+	.irq_ack	= xlr_pic_ack,
+};
+
+static void rsvd_irq_handler(struct irq_data *d)
+{
+	WARN(d->irq >= PIC_IRQ_BASE, "Bad irq %d", d->irq);
+}
+
+/*
+ * Chip definition for CPU originated interrupts(timer, msg) and
+ * IPIs
+ */
+struct irq_chip nlm_cpu_intr = {
+	.name		= "XLR-CPU-INTR",
+	.irq_enable	= rsvd_irq_handler,
+	.irq_mask	= rsvd_irq_handler,
+	.irq_ack	= rsvd_irq_handler,
+};
+
+void __init init_xlr_irqs(void)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	uint32_t thread_mask = 1;
+	int level, i;
+
+	pr_info("Interrupt thread mask [%x]\n", thread_mask);
+	for (i = 0; i < PIC_NUM_IRTS; i++) {
+		level = PIC_IRQ_IS_EDGE_TRIGGERED(i);
+
+		/* Bind all PIC irqs to boot cpu */
+		netlogic_write_reg(mmio, PIC_IRT_0_BASE + i, thread_mask);
+
+		/*
+		 * Use local scheduling and high polarity for all IRTs
+		 * Invalidate all IRTs, by default
+		 */
+		netlogic_write_reg(mmio, PIC_IRT_1_BASE + i,
+				(level << 30) | (1 << 6) | (PIC_IRQ_BASE + i));
+	}
+
+	/* Make all IRQs as level triggered by default */
+	for (i = 0; i < NR_IRQS; i++) {
+		if (PIC_IRQ_IS_IRT(i))
+			irq_set_chip_and_handler(i, &xlr_pic, handle_level_irq);
+		else
+			irq_set_chip_and_handler(i, &nlm_cpu_intr,
+						handle_level_irq);
+	}
+#ifdef CONFIG_SMP
+	irq_set_chip_and_handler(IRQ_IPI_SMP_FUNCTION, &nlm_cpu_intr,
+			 nlm_smp_function_ipi_handler);
+	irq_set_chip_and_handler(IRQ_IPI_SMP_RESCHEDULE, &nlm_cpu_intr,
+			 nlm_smp_resched_ipi_handler);
+	nlm_irq_mask |=
+	    ((1ULL << IRQ_IPI_SMP_FUNCTION) | (1ULL << IRQ_IPI_SMP_RESCHEDULE));
+#endif
+
+#ifdef CONFIG_PCI
+	/*
+	 * For PCI interrupts, we need to ack the PIC controller too, overload
+	 * irq handler data to do this
+	 */
+	if (nlm_chip_is_xls()) {
+		if (nlm_chip_is_xls_b()) {
+			irq_set_handler_data(PIC_PCIE_LINK0_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_LINK1_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_XLSB0_LINK2_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ,
+							xls_pcie_ack_b);
+		} else {
+			irq_set_handler_data(PIC_PCIE_LINK0_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK1_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK2_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK3_IRQ, xls_pcie_ack);
+		}
+	} else {
+		/* XLR PCI controller ACK */
+		irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack);
+	}
+#endif
+	/* unmask all PIC related interrupts. If no handler is installed by the
+	 * drivers, it'll just ack the interrupt and return
+	 */
+	for (i = PIC_IRT_FIRST_IRQ; i <= PIC_IRT_LAST_IRQ; i++)
+		nlm_irq_mask |= (1ULL << i);
+
+	nlm_irq_mask |= (1ULL << IRQ_TIMER);
+}
+
+void __init arch_init_irq(void)
+{
+	/* Initialize the irq descriptors */
+	init_xlr_irqs();
+	write_c0_eimr(nlm_irq_mask);
+}
+
+void __cpuinit nlm_smp_irq_init(void)
+{
+	/* set interrupt mask for non-zero cpus */
+	write_c0_eimr(nlm_irq_mask);
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	uint64_t eirr;
+	int i;
+
+	eirr = read_c0_eirr() & read_c0_eimr();
+	if (!eirr)
+		return;
+
+	/* no need of EIRR here, writing compare clears interrupt */
+	if (eirr & (1 << IRQ_TIMER)) {
+		do_IRQ(IRQ_TIMER);
+		return;
+	}
+
+	/* use dcltz: optimize below code */
+	for (i = 63; i != -1; i--) {
+		if (eirr & (1ULL << i))
+			break;
+	}
+	if (i == -1) {
+		pr_err("no interrupt !!\n");
+		return;
+	}
+
+	/* Ack eirr */
+	write_c0_eirr(1ULL << i);
+
+	do_IRQ(i);
+}
diff --git a/arch/mips/netlogic/xlr/platform.c b/arch/mips/netlogic/xlr/platform.c
new file mode 100644
index 000000000000..609ec2534642
--- /dev/null
+++ b/arch/mips/netlogic/xlr/platform.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2011, Netlogic Microsystems.
+ * Copyright 2004, Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/resource.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_reg.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+unsigned int nlm_xlr_uart_in(struct uart_port *p, int offset)
+{
+	nlm_reg_t *mmio;
+	unsigned int value;
+
+	/* XLR uart does not need any mapping of regs */
+	mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift));
+	value = netlogic_read_reg(mmio, 0);
+
+	/* See XLR/XLS errata */
+	if (offset == UART_MSR)
+		value ^= 0xF0;
+	else if (offset == UART_MCR)
+		value ^= 0x3;
+
+	return value;
+}
+
+void nlm_xlr_uart_out(struct uart_port *p, int offset, int value)
+{
+	nlm_reg_t *mmio;
+
+	/* XLR uart does not need any mapping of regs */
+	mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift));
+
+	/* See XLR/XLS errata */
+	if (offset == UART_MSR)
+		value ^= 0xF0;
+	else if (offset == UART_MCR)
+		value ^= 0x3;
+
+	netlogic_write_reg(mmio, 0, value);
+}
+
+#define PORT(_irq)					\
+	{						\
+		.irq		= _irq,			\
+		.regshift	= 2,			\
+		.iotype		= UPIO_MEM32,		\
+		.flags		= (UPF_SKIP_TEST |	\
+			 UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF),\
+		.uartclk	= PIC_CLKS_PER_SEC,	\
+		.type		= PORT_16550A,		\
+		.serial_in	= nlm_xlr_uart_in,	\
+		.serial_out	= nlm_xlr_uart_out,	\
+	}
+
+static struct plat_serial8250_port xlr_uart_data[] = {
+	PORT(PIC_UART_0_IRQ),
+	PORT(PIC_UART_1_IRQ),
+	{},
+};
+
+static struct platform_device uart_device = {
+	.name		= "serial8250",
+	.id		= PLAT8250_DEV_PLATFORM,
+	.dev = {
+		.platform_data = xlr_uart_data,
+	},
+};
+
+static int __init nlm_uart_init(void)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	xlr_uart_data[0].membase = (void __iomem *)mmio;
+	xlr_uart_data[0].mapbase = CPHYSADDR((unsigned long)mmio);
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_1_OFFSET);
+	xlr_uart_data[1].membase = (void __iomem *)mmio;
+	xlr_uart_data[1].mapbase = CPHYSADDR((unsigned long)mmio);
+
+	return platform_device_register(&uart_device);
+}
+
+arch_initcall(nlm_uart_init);
diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c
new file mode 100644
index 000000000000..482802569e74
--- /dev/null
+++ b/arch/mips/netlogic/xlr/setup.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/serial_8250.h>
+#include <linux/pm.h>
+
+#include <asm/reboot.h>
+#include <asm/time.h>
+#include <asm/bootinfo.h>
+#include <asm/smp-ops.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/psb-bootinfo.h>
+
+#include <asm/netlogic/xlr/xlr.h>
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/gpio.h>
+
+unsigned long netlogic_io_base = (unsigned long)(DEFAULT_NETLOGIC_IO_BASE);
+unsigned long nlm_common_ebase = 0x0;
+struct psb_info nlm_prom_info;
+
+static void nlm_early_serial_setup(void)
+{
+	struct uart_port s;
+	nlm_reg_t *uart_base;
+
+	uart_base = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	memset(&s, 0, sizeof(s));
+	s.flags		= ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST;
+	s.iotype	= UPIO_MEM32;
+	s.regshift	= 2;
+	s.irq		= PIC_UART_0_IRQ;
+	s.uartclk	= PIC_CLKS_PER_SEC;
+	s.serial_in	= nlm_xlr_uart_in;
+	s.serial_out	= nlm_xlr_uart_out;
+	s.mapbase	= (unsigned long)uart_base;
+	s.membase	= (unsigned char __iomem *)uart_base;
+	early_serial_setup(&s);
+}
+
+static void nlm_linux_exit(void)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_GPIO_OFFSET);
+	/* trigger a chip reset by writing 1 to GPIO_SWRESET_REG */
+	netlogic_write_reg(mmio, NETLOGIC_GPIO_SWRESET_REG, 1);
+	for ( ; ; )
+		cpu_wait();
+}
+
+void __init plat_mem_setup(void)
+{
+	panic_timeout	= 5;
+	_machine_restart = (void (*)(char *))nlm_linux_exit;
+	_machine_halt	= nlm_linux_exit;
+	pm_power_off	= nlm_linux_exit;
+}
+
+const char *get_system_type(void)
+{
+	return "Netlogic XLR/XLS Series";
+}
+
+void __init prom_free_prom_memory(void)
+{
+	/* Nothing yet */
+}
+
+static void build_arcs_cmdline(int *argv)
+{
+	int i, remain, len;
+	char *arg;
+
+	remain = sizeof(arcs_cmdline) - 1;
+	arcs_cmdline[0] = '\0';
+	for (i = 0; argv[i] != 0; i++) {
+		arg = (char *)(long)argv[i];
+		len = strlen(arg);
+		if (len + 1 > remain)
+			break;
+		strcat(arcs_cmdline, arg);
+		strcat(arcs_cmdline, " ");
+		remain -=  len + 1;
+	}
+
+	/* Add the default options here */
+	if ((strstr(arcs_cmdline, "console=")) == NULL) {
+		arg = "console=ttyS0,38400 ";
+		len = strlen(arg);
+		if (len > remain)
+			goto fail;
+		strcat(arcs_cmdline, arg);
+		remain -= len;
+	}
+#ifdef CONFIG_BLK_DEV_INITRD
+	if ((strstr(arcs_cmdline, "rdinit=")) == NULL) {
+		arg = "rdinit=/sbin/init ";
+		len = strlen(arg);
+		if (len > remain)
+			goto fail;
+		strcat(arcs_cmdline, arg);
+		remain -= len;
+	}
+#endif
+	return;
+fail:
+	panic("Cannot add %s, command line too big!", arg);
+}
+
+static void prom_add_memory(void)
+{
+	struct nlm_boot_mem_map *bootm;
+	u64 start, size;
+	u64 pref_backup = 512;  /* avoid pref walking beyond end */
+	int i;
+
+	bootm = (void *)(long)nlm_prom_info.psb_mem_map;
+	for (i = 0; i < bootm->nr_map; i++) {
+		if (bootm->map[i].type != BOOT_MEM_RAM)
+			continue;
+		start = bootm->map[i].addr;
+		size   = bootm->map[i].size;
+
+		/* Work around for using bootloader mem */
+		if (i == 0 && start == 0 && size == 0x0c000000)
+			size = 0x0ff00000;
+
+		add_memory_region(start, size - pref_backup, BOOT_MEM_RAM);
+	}
+}
+
+void __init prom_init(void)
+{
+	int *argv, *envp;		/* passed as 32 bit ptrs */
+	struct psb_info *prom_infop;
+
+	/* truncate to 32 bit and sign extend all args */
+	argv = (int *)(long)(int)fw_arg1;
+	envp = (int *)(long)(int)fw_arg2;
+	prom_infop = (struct psb_info *)(long)(int)fw_arg3;
+
+	nlm_prom_info = *prom_infop;
+
+	nlm_early_serial_setup();
+	build_arcs_cmdline(argv);
+	nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1));
+	prom_add_memory();
+
+#ifdef CONFIG_SMP
+	nlm_wakeup_secondary_cpus(nlm_prom_info.online_cpu_map);
+	register_smp_ops(&nlm_smp_ops);
+#endif
+}
diff --git a/arch/mips/netlogic/xlr/smp.c b/arch/mips/netlogic/xlr/smp.c
new file mode 100644
index 000000000000..b495a7f1433b
--- /dev/null
+++ b/arch/mips/netlogic/xlr/smp.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+
+#include <asm/mmu_context.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/mips-extns.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+void core_send_ipi(int logical_cpu, unsigned int action)
+{
+	int cpu = cpu_logical_map(logical_cpu);
+	u32 tid = cpu & 0x3;
+	u32 pid = (cpu >> 2) & 0x07;
+	u32 ipi = (tid << 16) | (pid << 20);
+
+	if (action & SMP_CALL_FUNCTION)
+		ipi |= IRQ_IPI_SMP_FUNCTION;
+	else if (action & SMP_RESCHEDULE_YOURSELF)
+		ipi |= IRQ_IPI_SMP_RESCHEDULE;
+	else
+		return;
+
+	pic_send_ipi(ipi);
+}
+
+void nlm_send_ipi_single(int cpu, unsigned int action)
+{
+	core_send_ipi(cpu, action);
+}
+
+void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask) {
+		core_send_ipi(cpu, action);
+	}
+}
+
+/* IRQ_IPI_SMP_FUNCTION Handler */
+void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc)
+{
+	smp_call_function_interrupt();
+}
+
+/* IRQ_IPI_SMP_RESCHEDULE  handler */
+void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc)
+{
+	set_need_resched();
+}
+
+void nlm_common_ipi_handler(int irq, struct pt_regs *regs)
+{
+	if (irq == IRQ_IPI_SMP_FUNCTION) {
+		smp_call_function_interrupt();
+	} else {
+		/* Announce that we are for reschduling */
+		set_need_resched();
+	}
+}
+
+/*
+ * Called before going into mips code, early cpu init
+ */
+void nlm_early_init_secondary(void)
+{
+	write_c0_ebase((uint32_t)nlm_common_ebase);
+	/* TLB partition here later */
+}
+
+/*
+ * Code to run on secondary just after probing the CPU
+ */
+static void __cpuinit nlm_init_secondary(void)
+{
+	nlm_smp_irq_init();
+}
+
+void nlm_smp_finish(void)
+{
+#ifdef notyet
+	nlm_common_msgring_cpu_init();
+#endif
+}
+
+void nlm_cpus_done(void)
+{
+}
+
+/*
+ * Boot all other cpus in the system, initialize them, and bring them into
+ * the boot function
+ */
+int nlm_cpu_unblock[NR_CPUS];
+int nlm_cpu_ready[NR_CPUS];
+unsigned long nlm_next_gp;
+unsigned long nlm_next_sp;
+cpumask_t phys_cpu_present_map;
+
+void nlm_boot_secondary(int logical_cpu, struct task_struct *idle)
+{
+	unsigned long gp = (unsigned long)task_thread_info(idle);
+	unsigned long sp = (unsigned long)__KSTK_TOS(idle);
+	int cpu = cpu_logical_map(logical_cpu);
+
+	nlm_next_sp = sp;
+	nlm_next_gp = gp;
+
+	/* barrier */
+	__sync();
+	nlm_cpu_unblock[cpu] = 1;
+}
+
+void __init nlm_smp_setup(void)
+{
+	unsigned int boot_cpu;
+	int num_cpus, i;
+
+	boot_cpu = hard_smp_processor_id();
+	cpus_clear(phys_cpu_present_map);
+
+	cpu_set(boot_cpu, phys_cpu_present_map);
+	__cpu_number_map[boot_cpu] = 0;
+	__cpu_logical_map[0] = boot_cpu;
+	cpu_set(0, cpu_possible_map);
+
+	num_cpus = 1;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (nlm_cpu_ready[i]) {
+			cpu_set(i, phys_cpu_present_map);
+			__cpu_number_map[i] = num_cpus;
+			__cpu_logical_map[num_cpus] = i;
+			cpu_set(num_cpus, cpu_possible_map);
+			++num_cpus;
+		}
+	}
+
+	pr_info("Phys CPU present map: %lx, possible map %lx\n",
+		(unsigned long)phys_cpu_present_map.bits[0],
+		(unsigned long)cpu_possible_map.bits[0]);
+
+	pr_info("Detected %i Slave CPU(s)\n", num_cpus);
+}
+
+void nlm_prepare_cpus(unsigned int max_cpus)
+{
+}
+
+struct plat_smp_ops nlm_smp_ops = {
+	.send_ipi_single	= nlm_send_ipi_single,
+	.send_ipi_mask		= nlm_send_ipi_mask,
+	.init_secondary		= nlm_init_secondary,
+	.smp_finish		= nlm_smp_finish,
+	.cpus_done		= nlm_cpus_done,
+	.boot_secondary		= nlm_boot_secondary,
+	.smp_setup		= nlm_smp_setup,
+	.prepare_cpus		= nlm_prepare_cpus,
+};
+
+unsigned long secondary_entry_point;
+
+int nlm_wakeup_secondary_cpus(u32 wakeup_mask)
+{
+	unsigned int tid, pid, ipi, i, boot_cpu;
+	void *reset_vec;
+
+	secondary_entry_point = (unsigned long)prom_pre_boot_secondary_cpus;
+	reset_vec = (void *)CKSEG1ADDR(0x1fc00000);
+	memcpy(reset_vec, nlm_boot_smp_nmi, 0x80);
+	boot_cpu = hard_smp_processor_id();
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (i == boot_cpu)
+			continue;
+		if (wakeup_mask & (1u << i)) {
+			tid = i & 0x3;
+			pid = (i >> 2) & 0x7;
+			ipi = (tid << 16) | (pid << 20) | (1 << 8);
+			pic_send_ipi(ipi);
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/mips/netlogic/xlr/smpboot.S b/arch/mips/netlogic/xlr/smpboot.S
new file mode 100644
index 000000000000..b8e074402c99
--- /dev/null
+++ b/arch/mips/netlogic/xlr/smpboot.S
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+
+
+/* Don't jump to linux function from Bootloader stack. Change it
+ * here. Kernel might allocate bootloader memory before all the CPUs are
+ * brought up (eg: Inode cache region) and we better don't overwrite this
+ * memory
+ */
+NESTED(prom_pre_boot_secondary_cpus, 16, sp)
+	.set	mips64
+	mfc0	t0, $15, 1	# read ebase
+	andi	t0, 0x1f	# t0 has the processor_id()
+	sll	t0, 2		# offset in cpu array
+
+	PTR_LA	t1, nlm_cpu_ready # mark CPU ready
+	PTR_ADDU t1, t0
+	li	t2, 1
+	sw	t2, 0(t1)
+
+	PTR_LA	t1, nlm_cpu_unblock
+	PTR_ADDU t1, t0
+1:	lw	t2, 0(t1)	# wait till unblocked
+	beqz	t2, 1b
+	nop
+
+	PTR_LA	t1, nlm_next_sp
+	PTR_L	sp, 0(t1)
+	PTR_LA	t1, nlm_next_gp
+	PTR_L	gp, 0(t1)
+
+	PTR_LA	t0, nlm_early_init_secondary
+	jalr	t0
+	nop
+
+	PTR_LA	t0, smp_bootstrap
+	jr	t0
+	nop
+END(prom_pre_boot_secondary_cpus)
+
+NESTED(nlm_boot_smp_nmi, 0, sp)
+	.set push
+	.set noat
+	.set mips64
+	.set noreorder
+
+	/* Clear the  NMI and BEV bits */
+	MFC0	k0, CP0_STATUS
+	li 	k1, 0xffb7ffff
+	and	k0, k0, k1
+	MTC0	k0, CP0_STATUS
+
+	PTR_LA  k1, secondary_entry_point
+	PTR_L	k0, 0(k1)
+	jr	k0
+	nop
+	.set pop
+END(nlm_boot_smp_nmi)
diff --git a/arch/mips/netlogic/xlr/time.c b/arch/mips/netlogic/xlr/time.c
new file mode 100644
index 000000000000..0d81b262593c
--- /dev/null
+++ b/arch/mips/netlogic/xlr/time.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+
+#include <asm/time.h>
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/psb-bootinfo.h>
+
+unsigned int __cpuinit get_c0_compare_int(void)
+{
+	return IRQ_TIMER;
+}
+
+void __init plat_time_init(void)
+{
+	mips_hpt_frequency = nlm_prom_info.cpu_frequency;
+	pr_info("MIPS counter frequency [%ld]\n",
+		(unsigned long)mips_hpt_frequency);
+}
diff --git a/arch/mips/netlogic/xlr/xlr_console.c b/arch/mips/netlogic/xlr/xlr_console.c
new file mode 100644
index 000000000000..759df0692201
--- /dev/null
+++ b/arch/mips/netlogic/xlr/xlr_console.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/types.h>
+#include <asm/netlogic/xlr/iomap.h>
+
+void prom_putchar(char c)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	while (netlogic_read_reg(mmio, 0x5) == 0)
+		;
+	netlogic_write_reg(mmio, 0x0, c);
+}
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index c9209ca6c8e7..4df879937446 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_SIBYTE_SB1250)	+= fixup-sb1250.o pci-sb1250.o
 obj-$(CONFIG_SIBYTE_BCM112X)	+= fixup-sb1250.o pci-sb1250.o
 obj-$(CONFIG_SIBYTE_BCM1x80)	+= pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)		+= fixup-sni.o ops-sni.o
+obj-$(CONFIG_SOC_XWAY)		+= pci-lantiq.o ops-lantiq.o
 obj-$(CONFIG_TANBAC_TB0219)	+= fixup-tb0219.o
 obj-$(CONFIG_TANBAC_TB0226)	+= fixup-tb0226.o
 obj-$(CONFIG_TANBAC_TB0287)	+= fixup-tb0287.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_ZAO_CAPCELLA)	+= fixup-capcella.o
 obj-$(CONFIG_WR_PPMC)		+= fixup-wrppmc.o
 obj-$(CONFIG_MIKROTIK_RB532)	+= pci-rc32434.o ops-rc32434.o fixup-rc32434.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= pci-octeon.o pcie-octeon.o
+obj-$(CONFIG_NLM_XLR)		+= pci-xlr.o
 
 ifdef CONFIG_PCI_MSI
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= msi-octeon.o
diff --git a/arch/mips/pci/ops-lantiq.c b/arch/mips/pci/ops-lantiq.c
new file mode 100644
index 000000000000..1f2afb55cc71
--- /dev/null
+++ b/arch/mips/pci/ops-lantiq.c
@@ -0,0 +1,116 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/addrspace.h>
+#include <linux/vmalloc.h>
+
+#include <lantiq_soc.h>
+
+#include "pci-lantiq.h"
+
+#define LTQ_PCI_CFG_BUSNUM_SHF 16
+#define LTQ_PCI_CFG_DEVNUM_SHF 11
+#define LTQ_PCI_CFG_FUNNUM_SHF 8
+
+#define PCI_ACCESS_READ  0
+#define PCI_ACCESS_WRITE 1
+
+static int ltq_pci_config_access(unsigned char access_type, struct pci_bus *bus,
+	unsigned int devfn, unsigned int where, u32 *data)
+{
+	unsigned long cfg_base;
+	unsigned long flags;
+	u32 temp;
+
+	/* we support slot from 0 to 15 dev_fn & 0x68 (AD29) is the
+	   SoC itself */
+	if ((bus->number != 0) || ((devfn & 0xf8) > 0x78)
+		|| ((devfn & 0xf8) == 0) || ((devfn & 0xf8) == 0x68))
+		return 1;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (bus->number << LTQ_PCI_CFG_BUSNUM_SHF) | (devfn <<
+			LTQ_PCI_CFG_FUNNUM_SHF) | (where & ~0x3);
+
+	/* Perform access */
+	if (access_type == PCI_ACCESS_WRITE) {
+		ltq_w32(swab32(*data), ((u32 *)cfg_base));
+	} else {
+		*data = ltq_r32(((u32 *)(cfg_base)));
+		*data = swab32(*data);
+	}
+	wmb();
+
+	/* clean possible Master abort */
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (0x0 << LTQ_PCI_CFG_FUNNUM_SHF) + 4;
+	temp = ltq_r32(((u32 *)(cfg_base)));
+	temp = swab32(temp);
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (0x68 << LTQ_PCI_CFG_FUNNUM_SHF) + 4;
+	ltq_w32(temp, ((u32 *)cfg_base));
+
+	spin_unlock_irqrestore(&ebu_lock, flags);
+
+	if (((*data) == 0xffffffff) && (access_type == PCI_ACCESS_READ))
+		return 1;
+
+	return 0;
+}
+
+int ltq_pci_read_config_dword(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 *val)
+{
+	u32 data = 0;
+
+	if (ltq_pci_config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 1)
+		*val = (data >> ((where & 3) << 3)) & 0xff;
+	else if (size == 2)
+		*val = (data >> ((where & 3) << 3)) & 0xffff;
+	else
+		*val = data;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int ltq_pci_write_config_dword(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 val)
+{
+	u32 data = 0;
+
+	if (size == 4) {
+		data = val;
+	} else {
+		if (ltq_pci_config_access(PCI_ACCESS_READ, bus,
+				devfn, where, &data))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		if (size == 1)
+			data = (data & ~(0xff << ((where & 3) << 3))) |
+				(val << ((where & 3) << 3));
+		else if (size == 2)
+			data = (data & ~(0xffff << ((where & 3) << 3))) |
+				(val << ((where & 3) << 3));
+	}
+
+	if (ltq_pci_config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
new file mode 100644
index 000000000000..603d7493e966
--- /dev/null
+++ b/arch/mips/pci/pci-lantiq.c
@@ -0,0 +1,297 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/platform_device.h>
+
+#include <asm/pci.h>
+#include <asm/gpio.h>
+#include <asm/addrspace.h>
+
+#include <lantiq_soc.h>
+#include <lantiq_irq.h>
+#include <lantiq_platform.h>
+
+#include "pci-lantiq.h"
+
+#define LTQ_PCI_CFG_BASE		0x17000000
+#define LTQ_PCI_CFG_SIZE		0x00008000
+#define LTQ_PCI_MEM_BASE		0x18000000
+#define LTQ_PCI_MEM_SIZE		0x02000000
+#define LTQ_PCI_IO_BASE			0x1AE00000
+#define LTQ_PCI_IO_SIZE			0x00200000
+
+#define PCI_CR_FCI_ADDR_MAP0		0x00C0
+#define PCI_CR_FCI_ADDR_MAP1		0x00C4
+#define PCI_CR_FCI_ADDR_MAP2		0x00C8
+#define PCI_CR_FCI_ADDR_MAP3		0x00CC
+#define PCI_CR_FCI_ADDR_MAP4		0x00D0
+#define PCI_CR_FCI_ADDR_MAP5		0x00D4
+#define PCI_CR_FCI_ADDR_MAP6		0x00D8
+#define PCI_CR_FCI_ADDR_MAP7		0x00DC
+#define PCI_CR_CLK_CTRL			0x0000
+#define PCI_CR_PCI_MOD			0x0030
+#define PCI_CR_PC_ARB			0x0080
+#define PCI_CR_FCI_ADDR_MAP11hg		0x00E4
+#define PCI_CR_BAR11MASK		0x0044
+#define PCI_CR_BAR12MASK		0x0048
+#define PCI_CR_BAR13MASK		0x004C
+#define PCI_CS_BASE_ADDR1		0x0010
+#define PCI_CR_PCI_ADDR_MAP11		0x0064
+#define PCI_CR_FCI_BURST_LENGTH		0x00E8
+#define PCI_CR_PCI_EOI			0x002C
+#define PCI_CS_STS_CMD			0x0004
+
+#define PCI_MASTER0_REQ_MASK_2BITS	8
+#define PCI_MASTER1_REQ_MASK_2BITS	10
+#define PCI_MASTER2_REQ_MASK_2BITS	12
+#define INTERNAL_ARB_ENABLE_BIT		0
+
+#define LTQ_CGU_IFCCR		0x0018
+#define LTQ_CGU_PCICR		0x0034
+
+#define ltq_pci_w32(x, y)	ltq_w32((x), ltq_pci_membase + (y))
+#define ltq_pci_r32(x)		ltq_r32(ltq_pci_membase + (x))
+
+#define ltq_pci_cfg_w32(x, y)	ltq_w32((x), ltq_pci_mapped_cfg + (y))
+#define ltq_pci_cfg_r32(x)	ltq_r32(ltq_pci_mapped_cfg + (x))
+
+struct ltq_pci_gpio_map {
+	int pin;
+	int alt0;
+	int alt1;
+	int dir;
+	char *name;
+};
+
+/* the pci core can make use of the following gpios */
+static struct ltq_pci_gpio_map ltq_pci_gpio_map[] = {
+	{ 0, 1, 0, 0, "pci-exin0" },
+	{ 1, 1, 0, 0, "pci-exin1" },
+	{ 2, 1, 0, 0, "pci-exin2" },
+	{ 39, 1, 0, 0, "pci-exin3" },
+	{ 10, 1, 0, 0, "pci-exin4" },
+	{ 9, 1, 0, 0, "pci-exin5" },
+	{ 30, 1, 0, 1, "pci-gnt1" },
+	{ 23, 1, 0, 1, "pci-gnt2" },
+	{ 19, 1, 0, 1, "pci-gnt3" },
+	{ 38, 1, 0, 1, "pci-gnt4" },
+	{ 29, 1, 0, 0, "pci-req1" },
+	{ 31, 1, 0, 0, "pci-req2" },
+	{ 3, 1, 0, 0, "pci-req3" },
+	{ 37, 1, 0, 0, "pci-req4" },
+};
+
+__iomem void *ltq_pci_mapped_cfg;
+static __iomem void *ltq_pci_membase;
+
+int (*ltqpci_plat_dev_init)(struct pci_dev *dev) = NULL;
+
+/* Since the PCI REQ pins can be reused for other functionality, make it
+   possible to exclude those from interpretation by the PCI controller */
+static int ltq_pci_req_mask = 0xf;
+
+static int *ltq_pci_irq_map;
+
+struct pci_ops ltq_pci_ops = {
+	.read	= ltq_pci_read_config_dword,
+	.write	= ltq_pci_write_config_dword
+};
+
+static struct resource pci_io_resource = {
+	.name	= "pci io space",
+	.start	= LTQ_PCI_IO_BASE,
+	.end	= LTQ_PCI_IO_BASE + LTQ_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
+
+static struct resource pci_mem_resource = {
+	.name	= "pci memory space",
+	.start	= LTQ_PCI_MEM_BASE,
+	.end	= LTQ_PCI_MEM_BASE + LTQ_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+static struct pci_controller ltq_pci_controller = {
+	.pci_ops	= &ltq_pci_ops,
+	.mem_resource	= &pci_mem_resource,
+	.mem_offset	= 0x00000000UL,
+	.io_resource	= &pci_io_resource,
+	.io_offset	= 0x00000000UL,
+};
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	if (ltqpci_plat_dev_init)
+		return ltqpci_plat_dev_init(dev);
+
+	return 0;
+}
+
+static u32 ltq_calc_bar11mask(void)
+{
+	u32 mem, bar11mask;
+
+	/* BAR11MASK value depends on available memory on system. */
+	mem = num_physpages * PAGE_SIZE;
+	bar11mask = (0x0ffffff0 & ~((1 << (fls(mem) - 1)) - 1)) | 8;
+
+	return bar11mask;
+}
+
+static void ltq_pci_setup_gpio(int gpio)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(ltq_pci_gpio_map); i++) {
+		if (gpio & (1 << i)) {
+			ltq_gpio_request(ltq_pci_gpio_map[i].pin,
+				ltq_pci_gpio_map[i].alt0,
+				ltq_pci_gpio_map[i].alt1,
+				ltq_pci_gpio_map[i].dir,
+				ltq_pci_gpio_map[i].name);
+		}
+	}
+	ltq_gpio_request(21, 0, 0, 1, "pci-reset");
+	ltq_pci_req_mask = (gpio >> PCI_REQ_SHIFT) & PCI_REQ_MASK;
+}
+
+static int __devinit ltq_pci_startup(struct ltq_pci_data *conf)
+{
+	u32 temp_buffer;
+
+	/* set clock to 33Mhz */
+	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR);
+	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR);
+
+	/* external or internal clock ? */
+	if (conf->clock) {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~(1 << 16),
+			LTQ_CGU_IFCCR);
+		ltq_cgu_w32((1 << 30), LTQ_CGU_PCICR);
+	} else {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | (1 << 16),
+			LTQ_CGU_IFCCR);
+		ltq_cgu_w32((1 << 31) | (1 << 30), LTQ_CGU_PCICR);
+	}
+
+	/* setup pci clock and gpis used by pci */
+	ltq_pci_setup_gpio(conf->gpio);
+
+	/* enable auto-switching between PCI and EBU */
+	ltq_pci_w32(0xa, PCI_CR_CLK_CTRL);
+
+	/* busy, i.e. configuration is not done, PCI access has to be retried */
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) & ~(1 << 24), PCI_CR_PCI_MOD);
+	wmb();
+	/* BUS Master/IO/MEM access */
+	ltq_pci_cfg_w32(ltq_pci_cfg_r32(PCI_CS_STS_CMD) | 7, PCI_CS_STS_CMD);
+
+	/* enable external 2 PCI masters */
+	temp_buffer = ltq_pci_r32(PCI_CR_PC_ARB);
+	temp_buffer &= (~(ltq_pci_req_mask << 16));
+	/* enable internal arbiter */
+	temp_buffer |= (1 << INTERNAL_ARB_ENABLE_BIT);
+	/* enable internal PCI master reqest */
+	temp_buffer &= (~(3 << PCI_MASTER0_REQ_MASK_2BITS));
+
+	/* enable EBU request */
+	temp_buffer &= (~(3 << PCI_MASTER1_REQ_MASK_2BITS));
+
+	/* enable all external masters request */
+	temp_buffer &= (~(3 << PCI_MASTER2_REQ_MASK_2BITS));
+	ltq_pci_w32(temp_buffer, PCI_CR_PC_ARB);
+	wmb();
+
+	/* setup BAR memory regions */
+	ltq_pci_w32(0x18000000, PCI_CR_FCI_ADDR_MAP0);
+	ltq_pci_w32(0x18400000, PCI_CR_FCI_ADDR_MAP1);
+	ltq_pci_w32(0x18800000, PCI_CR_FCI_ADDR_MAP2);
+	ltq_pci_w32(0x18c00000, PCI_CR_FCI_ADDR_MAP3);
+	ltq_pci_w32(0x19000000, PCI_CR_FCI_ADDR_MAP4);
+	ltq_pci_w32(0x19400000, PCI_CR_FCI_ADDR_MAP5);
+	ltq_pci_w32(0x19800000, PCI_CR_FCI_ADDR_MAP6);
+	ltq_pci_w32(0x19c00000, PCI_CR_FCI_ADDR_MAP7);
+	ltq_pci_w32(0x1ae00000, PCI_CR_FCI_ADDR_MAP11hg);
+	ltq_pci_w32(ltq_calc_bar11mask(), PCI_CR_BAR11MASK);
+	ltq_pci_w32(0, PCI_CR_PCI_ADDR_MAP11);
+	ltq_pci_w32(0, PCI_CS_BASE_ADDR1);
+	/* both TX and RX endian swap are enabled */
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_EOI) | 3, PCI_CR_PCI_EOI);
+	wmb();
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR12MASK) | 0x80000000,
+		PCI_CR_BAR12MASK);
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR13MASK) | 0x80000000,
+		PCI_CR_BAR13MASK);
+	/*use 8 dw burst length */
+	ltq_pci_w32(0x303, PCI_CR_FCI_BURST_LENGTH);
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) | (1 << 24), PCI_CR_PCI_MOD);
+	wmb();
+
+	/* setup irq line */
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_CON) | 0xc, LTQ_EBU_PCC_CON);
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_IEN) | 0x10, LTQ_EBU_PCC_IEN);
+
+	/* toggle reset pin */
+	__gpio_set_value(21, 0);
+	wmb();
+	mdelay(1);
+	__gpio_set_value(21, 1);
+	return 0;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	if (ltq_pci_irq_map[slot])
+		return ltq_pci_irq_map[slot];
+	printk(KERN_ERR "lq_pci: trying to map irq for unknown slot %d\n",
+		slot);
+
+	return 0;
+}
+
+static int __devinit ltq_pci_probe(struct platform_device *pdev)
+{
+	struct ltq_pci_data *ltq_pci_data =
+		(struct ltq_pci_data *) pdev->dev.platform_data;
+	pci_probe_only = 0;
+	ltq_pci_irq_map = ltq_pci_data->irq;
+	ltq_pci_membase = ioremap_nocache(PCI_CR_BASE_ADDR, PCI_CR_SIZE);
+	ltq_pci_mapped_cfg =
+		ioremap_nocache(LTQ_PCI_CFG_BASE, LTQ_PCI_CFG_BASE);
+	ltq_pci_controller.io_map_base =
+		(unsigned long)ioremap(LTQ_PCI_IO_BASE, LTQ_PCI_IO_SIZE - 1);
+	ltq_pci_startup(ltq_pci_data);
+	register_pci_controller(&ltq_pci_controller);
+
+	return 0;
+}
+
+static struct platform_driver
+ltq_pci_driver = {
+	.probe = ltq_pci_probe,
+	.driver = {
+		.name = "ltq_pci",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init pcibios_init(void)
+{
+	int ret = platform_driver_register(&ltq_pci_driver);
+	if (ret)
+		printk(KERN_INFO "ltq_pci: Error registering platfom driver!");
+	return ret;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/pci/pci-lantiq.h b/arch/mips/pci/pci-lantiq.h
new file mode 100644
index 000000000000..66bf6cd6be3c
--- /dev/null
+++ b/arch/mips/pci/pci-lantiq.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_PCI_H__
+#define _LTQ_PCI_H__
+
+extern __iomem void *ltq_pci_mapped_cfg;
+extern int ltq_pci_read_config_dword(struct pci_bus *bus,
+	unsigned int devfn, int where, int size, u32 *val);
+extern int ltq_pci_write_config_dword(struct pci_bus *bus,
+	unsigned int devfn, int where, int size, u32 val);
+
+#endif
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
new file mode 100644
index 000000000000..38fece16c435
--- /dev/null
+++ b/arch/mips/pci/pci-xlr.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+static void *pci_config_base;
+
+#define	pci_cfg_addr(bus, devfn, off) (((bus) << 16) | ((devfn) << 8) | (off))
+
+/* PCI ops */
+static inline u32 pci_cfg_read_32bit(struct pci_bus *bus, unsigned int devfn,
+	int where)
+{
+	u32 data;
+	u32 *cfgaddr;
+
+	cfgaddr = (u32 *)(pci_config_base +
+			pci_cfg_addr(bus->number, devfn, where & ~3));
+	data = *cfgaddr;
+	return cpu_to_le32(data);
+}
+
+static inline void pci_cfg_write_32bit(struct pci_bus *bus, unsigned int devfn,
+	int where, u32 data)
+{
+	u32 *cfgaddr;
+
+	cfgaddr = (u32 *)(pci_config_base +
+			pci_cfg_addr(bus->number, devfn, where & ~3));
+	*cfgaddr = cpu_to_le32(data);
+}
+
+static int nlm_pcibios_read(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 *val)
+{
+	u32 data;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	data = pci_cfg_read_32bit(bus, devfn, where);
+
+	if (size == 1)
+		*val = (data >> ((where & 3) << 3)) & 0xff;
+	else if (size == 2)
+		*val = (data >> ((where & 3) << 3)) & 0xffff;
+	else
+		*val = data;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int nlm_pcibios_write(struct pci_bus *bus, unsigned int devfn,
+		int where, int size, u32 val)
+{
+	u32 data;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	data = pci_cfg_read_32bit(bus, devfn, where);
+
+	if (size == 1)
+		data = (data & ~(0xff << ((where & 3) << 3))) |
+			(val << ((where & 3) << 3));
+	else if (size == 2)
+		data = (data & ~(0xffff << ((where & 3) << 3))) |
+			(val << ((where & 3) << 3));
+	else
+		data = val;
+
+	pci_cfg_write_32bit(bus, devfn, where, data);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops nlm_pci_ops = {
+	.read  = nlm_pcibios_read,
+	.write = nlm_pcibios_write
+};
+
+static struct resource nlm_pci_mem_resource = {
+	.name           = "XLR PCI MEM",
+	.start          = 0xd0000000UL,	/* 256MB PCI mem @ 0xd000_0000 */
+	.end            = 0xdfffffffUL,
+	.flags          = IORESOURCE_MEM,
+};
+
+static struct resource nlm_pci_io_resource = {
+	.name           = "XLR IO MEM",
+	.start          = 0x10000000UL,	/* 16MB PCI IO @ 0x1000_0000 */
+	.end            = 0x100fffffUL,
+	.flags          = IORESOURCE_IO,
+};
+
+struct pci_controller nlm_pci_controller = {
+	.index          = 0,
+	.pci_ops        = &nlm_pci_ops,
+	.mem_resource   = &nlm_pci_mem_resource,
+	.mem_offset     = 0x00000000UL,
+	.io_resource    = &nlm_pci_io_resource,
+	.io_offset      = 0x00000000UL,
+};
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	if (!nlm_chip_is_xls())
+		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ*/
+
+	/*
+	 * For XLS PCIe, there is an IRQ per Link, find out which
+	 * link the device is on to assign interrupts
+	*/
+	if (dev->bus->self == NULL)
+		return 0;
+
+	switch	(dev->bus->self->devfn) {
+	case 0x0:
+		return PIC_PCIE_LINK0_IRQ;
+	case 0x8:
+		return PIC_PCIE_LINK1_IRQ;
+	case 0x10:
+		if (nlm_chip_is_xls_b())
+			return PIC_PCIE_XLSB0_LINK2_IRQ;
+		else
+			return PIC_PCIE_LINK2_IRQ;
+	case 0x18:
+		if (nlm_chip_is_xls_b())
+			return PIC_PCIE_XLSB0_LINK3_IRQ;
+		else
+			return PIC_PCIE_LINK3_IRQ;
+	}
+	WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn);
+	return 0;
+}
+
+/* Do platform specific device initialization at pci_enable_device() time */
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	return 0;
+}
+
+static int __init pcibios_init(void)
+{
+	/* PSB assigns PCI resources */
+	pci_probe_only = 1;
+	pci_config_base = ioremap(DEFAULT_PCI_CONFIG_BASE, 16 << 20);
+
+	/* Extend IO port for memory mapped io */
+	ioport_resource.start =  0;
+	ioport_resource.end   = ~0;
+
+	set_io_port_base(CKSEG1);
+	nlm_pci_controller.io_map_base = CKSEG1;
+
+	pr_info("Registering XLR/XLS PCIX/PCIE Controller.\n");
+	register_pci_controller(&nlm_pci_controller);
+
+	return 0;
+}
+
+arch_initcall(pcibios_init);
+
+struct pci_fixup pcibios_fixups[] = {
+	{0}
+};
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
index f9b9dcdfa9dd..98fd0099d964 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
@@ -97,7 +97,7 @@ static int msp_per_irq_set_affinity(struct irq_data *d,
 
 static struct irq_chip msp_per_irq_controller = {
 	.name = "MSP_PER",
-	.irq_enable = unmask_per_irq.
+	.irq_enable = unmask_per_irq,
 	.irq_disable = mask_per_irq,
 	.irq_ack = msp_per_irq_ack,
 #ifdef CONFIG_SMP
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index efc9e889b349..2608752898c0 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -55,6 +55,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 
 	case 1:
@@ -63,6 +65,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 	}
 }
diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S
index dbb5c7b4b70f..f8a751c03282 100644
--- a/arch/mips/power/hibernate.S
+++ b/arch/mips/power/hibernate.S
@@ -35,7 +35,7 @@ LEAF(swsusp_arch_resume)
 0:
 	PTR_L t1, PBE_ADDRESS(t0)   /* source */
 	PTR_L t2, PBE_ORIG_ADDRESS(t0) /* destination */
-	PTR_ADDIU t3, t1, PAGE_SIZE
+	PTR_ADDU t3, t1, PAGE_SIZE
 1:
 	REG_L t8, (t1)
 	REG_S t8, (t2)
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index 37de05d595e7..6c47dfeb7be3 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -185,7 +185,7 @@ int __init rb532_gpio_init(void)
 	struct resource *r;
 
 	r = rb532_gpio_reg0_res;
-	rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start);
+	rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r));
 
 	if (!rb532_gpio_chip->regbase) {
 		printk(KERN_ERR "rb532: cannot remap GPIO register 0\n");
diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c
index deddbf0ebe5c..698904daf901 100644
--- a/arch/mips/sgi-ip22/ip22-platform.c
+++ b/arch/mips/sgi-ip22/ip22-platform.c
@@ -132,7 +132,7 @@ static struct platform_device eth1_device = {
  */
 static int __init sgiseeq_devinit(void)
 {
-	unsigned int tmp;
+	unsigned int pbdma __maybe_unused;
 	int res, i;
 
 	eth0_pd.hpc = hpc3c0;
@@ -151,7 +151,7 @@ static int __init sgiseeq_devinit(void)
 
 	/* Second HPC is missing? */
 	if (ip22_is_fullhouse() ||
-	    get_dbe(tmp, (unsigned int *)&hpc3c1->pbdma[1]))
+	    get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1]))
 		return 0;
 
 	sgimc->giopar |= SGIMC_GIOPAR_MASTEREXP1 | SGIMC_GIOPAR_EXP164 |
diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index 603fc91c1030..1a94c9894188 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -32,7 +32,7 @@
 static unsigned long dosample(void)
 {
 	u32 ct0, ct1;
-	u8 msb, lsb;
+	u8 msb;
 
 	/* Start the counter. */
 	sgint->tcword = (SGINT_TCWORD_CNT2 | SGINT_TCWORD_CALL |
@@ -46,7 +46,7 @@ static unsigned long dosample(void)
 	/* Latch and spin until top byte of counter2 is zero */
 	do {
 		writeb(SGINT_TCWORD_CNT2 | SGINT_TCWORD_CLAT, &sgint->tcword);
-		lsb = readb(&sgint->tcnt2);
+		(void) readb(&sgint->tcnt2);
 		msb = readb(&sgint->tcnt2);
 		ct1 = read_c0_count();
 	} while (msb);
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
index a1fa4abb3f6a..cd0d5b06cd83 100644
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ b/arch/mips/sgi-ip27/ip27-hubio.c
@@ -29,7 +29,6 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 			  unsigned long xtalk_addr, size_t size)
 {
 	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
-	volatile hubreg_t junk;
 	unsigned i;
 
 	/* use small-window mapping if possible */
@@ -64,7 +63,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 		 * after we write it.
 		 */
 		IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr);
-		junk = HUB_L(IIO_ITTE_GET(nasid, i));
+		(void) HUB_L(IIO_ITTE_GET(nasid, i));
 
 		return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE);
 	}
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 0a04603d577c..b18b04e48577 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void)
 #ifdef CONFIG_SMP
 	if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
 		smp_call_function_interrupt();
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index c3d30a88daf3..1d1919a44e88 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -54,11 +54,8 @@ void __init setup_replication_mask(void)
 
 static __init void set_ktext_source(nasid_t client_nasid, nasid_t server_nasid)
 {
-	cnodeid_t client_cnode;
 	kern_vars_t *kvp;
 
-	client_cnode = NASID_TO_COMPACT_NODEID(client_nasid);
-
 	kvp = &hub_data(client_nasid)->kern_vars;
 
 	KERN_VARS_ADDR(client_nasid) = (unsigned long)kvp;
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index a152538d3c97..ef74f3267f91 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -66,18 +66,7 @@ static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
 static void rt_set_mode(enum clock_event_mode mode,
 		struct clock_event_device *evt)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-		/* The only mode supported */
-		break;
-
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do  */
-		break;
-	}
+	/* Nothing to do ...  */
 }
 
 int rt_timer_irq;
@@ -174,8 +163,7 @@ static void __init hub_rt_clocksource_init(void)
 {
 	struct clocksource *cs = &hub_rt_clocksource;
 
-	clocksource_set_clock(cs, CYCLES_PER_SEC);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, CYCLES_PER_SEC);
 }
 
 void __init plat_time_init(void)
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 47b347c992ea..d667875be564 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	__raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c00a5cb1128d..38e7f6bd7922 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index c76151b56568..0904d4d30cb3 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -95,7 +95,7 @@ static void __init sni_a20r_timer_setup(void)
 static __init unsigned long dosample(void)
 {
 	u32 ct0, ct1;
-	volatile u8 msb, lsb;
+	volatile u8 msb;
 
 	/* Start the counter. */
 	outb_p(0x34, 0x43);
@@ -108,7 +108,7 @@ static __init unsigned long dosample(void)
 	/* Latch and spin until top byte of counter0 is zero */
 	do {
 		outb(0x00, 0x43);
-		lsb = inb(0x40);
+		(void) inb(0x40);
 		msb = inb(0x40);
 		ct1 = read_c0_count();
 	} while (msb);
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 226c826a2194..83fb27912231 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -494,14 +494,11 @@ void smp_send_stop(void)
  * @irq: The interrupt number.
  * @dev_id: The device ID.
  *
- * We need do nothing here, since the scheduling will be effected on our way
- * back through entry.S.
- *
  * Returns IRQ_HANDLED to indicate we handled the interrupt successfully.
  */
 static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
 {
-	/* do nothing */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 69d63d354ef0..828305f19cff 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
 				
 			case IPI_RESCHEDULE:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
-				/*
-				 * Reschedule callback.  Everything to be
-				 * done is done by the interrupt return path.
-				 */
+				scheduler_ipi();
 				break;
 
 			case IPI_CALL_FUNC:
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 8f1e4efd143e..2d9a5c7c76f5 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -69,6 +69,9 @@ SECTIONS
 	/* End of text section */
 	_etext = .;
 
+	/* Start of data section */
+	_sdata = .;
+
 	RODATA
 
 	/* writeable */
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b7ed8d7a9b33..b1d126258dee 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -266,8 +266,10 @@ static void __init setup_bootmem(void)
 	}
 	memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
 
-	for (i = 0; i < npmem_ranges; i++)
+	for (i = 0; i < npmem_ranges; i++) {
+		node_set_state(i, N_NORMAL_MEMORY);
 		node_set_online(i);
+	}
 #endif
 
 	/*
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b6ff882f695b..a3128ca0fe11 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -193,6 +193,12 @@ config SYS_SUPPORTS_APM_EMULATION
 	default y if PMAC_APM_EMU
 	bool
 
+config EPAPR_BOOT
+	bool
+	help
+	  Used to allow a board to specify it wants an ePAPR compliant wrapper.
+	default n
+
 config DEFAULT_UIMAGE
 	bool
 	help
@@ -209,7 +215,7 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 	depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
-		   PPC_85xx || PPC_86xx || PPC_PSERIES || 44x || 40x
+		   (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x
 
 config PPC_DCR_NATIVE
 	bool
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 2d38a50e66ba..a597dd77b903 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -267,6 +267,11 @@ config PPC_EARLY_DEBUG_USBGECKO
 	  Select this to enable early debugging for Nintendo GameCube/Wii
 	  consoles via an external USB Gecko adapter.
 
+config PPC_EARLY_DEBUG_WSP
+	bool "Early debugging via WSP's internal UART"
+	depends on PPC_WSP
+	select PPC_UDBG_16550
+
 endchoice
 
 config PPC_EARLY_DEBUG_44x_PHYSLOW
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 89178164af5e..c26200b40a47 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -69,7 +69,8 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \
 		cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
 		fsl-soc.c mpc8xx.c pq2.c ugecon.c
 src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
-		cuboot-ebony.c cuboot-hotfoot.c treeboot-ebony.c prpmc2800.c \
+		cuboot-ebony.c cuboot-hotfoot.c epapr.c treeboot-ebony.c \
+		prpmc2800.c \
 		ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
 		cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \
 		cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \
@@ -127,7 +128,7 @@ quiet_cmd_bootas = BOOTAS  $@
       cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
 
 quiet_cmd_bootar = BOOTAR  $@
-      cmd_bootar = $(CROSS32AR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+      cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
 
 $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
 	$(call if_changed_dep,bootcc)
@@ -182,6 +183,7 @@ image-$(CONFIG_PPC_HOLLY)		+= dtbImage.holly
 image-$(CONFIG_PPC_PRPMC2800)		+= dtbImage.prpmc2800
 image-$(CONFIG_PPC_ISERIES)		+= zImage.iseries
 image-$(CONFIG_DEFAULT_UIMAGE)		+= uImage
+image-$(CONFIG_EPAPR_BOOT)		+= zImage.epapr
 
 #
 # Targets which embed a device tree blob
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index f1c4dfc635be..0f7428a37efb 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -6,16 +6,28 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
+ * NOTE: this code runs in 32 bit mode, is position-independent,
+ * and is packaged as ELF32.
  */
 
 #include "ppc_asm.h"
 
 	.text
-	/* a procedure descriptor used when booting this as a COFF file */
+	/* A procedure descriptor used when booting this as a COFF file.
+	 * When making COFF, this comes first in the link and we're
+	 * linked at 0x500000.
+	 */
 	.globl	_zimage_start_opd
 _zimage_start_opd:
-	.long	_zimage_start, 0, 0, 0
+	.long	0x500000, 0, 0, 0
+
+p_start:	.long	_start
+p_etext:	.long	_etext
+p_bss_start:	.long	__bss_start
+p_end:		.long	_end
+
+	.weak	_platform_stack_top
+p_pstack:	.long	_platform_stack_top
 
 	.weak	_zimage_start
 	.globl	_zimage_start
@@ -24,37 +36,65 @@ _zimage_start:
 _zimage_start_lib:
 	/* Work out the offset between the address we were linked at
 	   and the address where we're running. */
-	bl	1f
-1:	mflr	r0
-	lis	r9,1b@ha
-	addi	r9,r9,1b@l
-	subf.	r0,r9,r0
-	beq	3f		/* if running at same address as linked */
+	bl	.+4
+p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
+	/* grab the link address of the dynamic section in r11 */
+	addis	r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
+	lwz	r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
+	cmpwi	r11,0
+	beq	3f		/* if not linked -pie */
+	/* get the runtime address of the dynamic section in r12 */
+	.weak	__dynamic_start
+	addis	r12,r10,(__dynamic_start-p_base)@ha
+	addi	r12,r12,(__dynamic_start-p_base)@l
+	subf	r11,r11,r12	/* runtime - linktime offset */
+
+	/* The dynamic section contains a series of tagged entries.
+	 * We need the RELA and RELACOUNT entries. */
+RELA = 7
+RELACOUNT = 0x6ffffff9
+	li	r9,0
+	li	r0,0
+9:	lwz	r8,0(r12)	/* get tag */
+	cmpwi	r8,0
+	beq	10f		/* end of list */
+	cmpwi	r8,RELA
+	bne	11f
+	lwz	r9,4(r12)	/* get RELA pointer in r9 */
+	b	12f
+11:	addis	r8,r8,(-RELACOUNT)@ha
+	cmpwi	r8,RELACOUNT@l
+	bne	12f
+	lwz	r0,4(r12)	/* get RELACOUNT value in r0 */
+12:	addi	r12,r12,8
+	b	9b
 
-	/* The .got2 section contains a list of addresses, so add
-	   the address offset onto each entry. */
-	lis	r9,__got2_start@ha
-	addi	r9,r9,__got2_start@l
-	lis	r8,__got2_end@ha
-	addi	r8,r8,__got2_end@l
-	subf.	r8,r9,r8
+	/* The relocation section contains a list of relocations.
+	 * We now do the R_PPC_RELATIVE ones, which point to words
+	 * which need to be initialized with addend + offset.
+	 * The R_PPC_RELATIVE ones come first and there are RELACOUNT
+	 * of them. */
+10:	/* skip relocation if we don't have both */
+	cmpwi	r0,0
 	beq	3f
-	srwi.	r8,r8,2
-	mtctr	r8
-	add	r9,r0,r9
-2:	lwz	r8,0(r9)
-	add	r8,r8,r0
-	stw	r8,0(r9)
-	addi	r9,r9,4
+	cmpwi	r9,0
+	beq	3f
+
+	add	r9,r9,r11	/* Relocate RELA pointer */
+	mtctr	r0
+2:	lbz	r0,4+3(r9)	/* ELF32_R_INFO(reloc->r_info) */
+	cmpwi	r0,22		/* R_PPC_RELATIVE */
+	bne	3f
+	lwz	r12,0(r9)	/* reloc->r_offset */
+	lwz	r0,8(r9)	/* reloc->r_addend */
+	add	r0,r0,r11
+	stwx	r0,r11,r12
+	addi	r9,r9,12
 	bdnz	2b
 
 	/* Do a cache flush for our text, in case the loader didn't */
-3:	lis	r9,_start@ha
-	addi	r9,r9,_start@l
-	add	r9,r0,r9
-	lis	r8,_etext@ha
-	addi	r8,r8,_etext@l
-	add	r8,r0,r8
+3:	lwz	r9,p_start-p_base(r10)	/* note: these are relocated now */
+	lwz	r8,p_etext-p_base(r10)
 4:	dcbf	r0,r9
 	icbi	r0,r9
 	addi	r9,r9,0x20
@@ -64,27 +104,19 @@ _zimage_start_lib:
 	isync
 
 	/* Clear the BSS */
-	lis	r9,__bss_start@ha
-	addi	r9,r9,__bss_start@l
-	add	r9,r0,r9
-	lis	r8,_end@ha
-	addi	r8,r8,_end@l
-	add	r8,r0,r8
-	li	r10,0
-5:	stw	r10,0(r9)
+	lwz	r9,p_bss_start-p_base(r10)
+	lwz	r8,p_end-p_base(r10)
+	li	r0,0
+5:	stw	r0,0(r9)
 	addi	r9,r9,4
 	cmplw	cr0,r9,r8
 	blt	5b
 
 	/* Possibly set up a custom stack */
-.weak	_platform_stack_top
-	lis	r8,_platform_stack_top@ha
-	addi	r8,r8,_platform_stack_top@l
+	lwz	r8,p_pstack-p_base(r10)
 	cmpwi	r8,0
 	beq	6f
-	add	r8,r0,r8
 	lwz	r1,0(r8)
-	add	r1,r0,r1
 	li	r0,0
 	stwu	r0,-16(r1)	/* establish a stack frame */
 6:
diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts
index e0668f877794..d6a8ae458137 100644
--- a/arch/powerpc/boot/dts/p1020rdb.dts
+++ b/arch/powerpc/boot/dts/p1020rdb.dts
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p1020si.dtsi"
+
 / {
-	model = "fsl,P1020";
+	model = "fsl,P1020RDB";
 	compatible = "fsl,P1020RDB";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		serial0 = &serial0;
@@ -26,34 +25,11 @@
 		pci1 = &pci1;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P1020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P1020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
-
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
 
 		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
 		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,88 +141,14 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p1020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p1020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <16 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p1020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
 		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
 
 			fsl_m25p80@0 {
 				#address-cells = <1>;
@@ -294,66 +196,7 @@
 			};
 		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p1020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x40000>; // L2,256K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
-
 		mdio@24000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,etsec2-mdio";
-			reg = <0x24000 0x1000 0xb0030 0x4>;
 
 			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
@@ -369,10 +212,6 @@
 		};
 
 		mdio@25000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,etsec2-tbi";
-			reg = <0x25000 0x1000 0xb1030 0x4>;
 
 			tbi0: tbi-phy@11 {
 				reg = <0x11>;
@@ -381,97 +220,25 @@
 		};
 
 		enet0: ethernet@b0000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb0000 0x1000>;
-				interrupts = <29 2 30 2 34 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb4000 0x1000>;
-				interrupts = <17 2 18 2 24 2>;
-			};
 		};
 
 		enet1: ethernet@b1000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy0>;
 			tbi-handle = <&tbi0>;
 			phy-connection-type = "sgmii";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb1000 0x1000>;
-				interrupts = <35 2 36 2 40 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb5000 0x1000>;
-				interrupts = <51 2 52 2 67 2>;
-			};
 		};
 
 		enet2: ethernet@b2000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb2000 0x1000>;
-				interrupts = <31 2 32 2 33 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb6000 0x1000>;
-				interrupts = <25 2 26 2 27 2>;
-			};
 		};
 
 		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
 			phy_type = "ulpi";
 		};
 
@@ -481,82 +248,23 @@
 		   it enables USB2. OTOH, U-Boot does create a new node
 		   when there isn't any. So, just comment it out.
 		usb@23000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x23000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <46 0x2>;
 			phy_type = "ulpi";
 		};
 		*/
 
-		sdhci@2e000 {
-			compatible = "fsl,p1020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-
-		msi@41600 {
-			compatible = "fsl,p1020-msi", "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p1020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
 	};
 
 	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <16 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -573,18 +281,16 @@
 	};
 
 	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <16 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
new file mode 100644
index 000000000000..f0bf7f42f097
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
@@ -0,0 +1,213 @@
+/*
+ * P1020 RDB  Core0 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb,
+ * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi.
+ *
+ * Please note to add "-b 0" for core0's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+	aliases {
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		PowerPC,P1020@1 {
+		status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		i2c@3000 {
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		serial1: serial@4600 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			fsl_m25p80@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,espi-flash";
+				reg = <0>;
+				linux,modalias = "fsl_m25p80";
+				spi-max-frequency = <40000000>;
+
+				partition@0 {
+					/* 512KB for u-boot Bootloader Image */
+					reg = <0x0 0x00080000>;
+					label = "SPI (RO) U-Boot Image";
+					read-only;
+				};
+
+				partition@80000 {
+					/* 512KB for DTB Image */
+					reg = <0x00080000 0x00080000>;
+					label = "SPI (RO) DTB Image";
+					read-only;
+				};
+
+				partition@100000 {
+					/* 4MB for Linux Kernel Image */
+					reg = <0x00100000 0x00400000>;
+					label = "SPI (RO) Linux Kernel Image";
+					read-only;
+				};
+
+				partition@500000 {
+					/* 4MB for Compressed RFS Image */
+					reg = <0x00500000 0x00400000>;
+					label = "SPI (RO) Compressed RFS Image";
+					read-only;
+				};
+
+				partition@900000 {
+					/* 7MB for JFFS2 based RFS */
+					reg = <0x00900000 0x00700000>;
+					label = "SPI (RW) JFFS2 RFS";
+				};
+			};
+		};
+
+		mdio@24000 {
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&mpic>;
+				interrupts = <3 1>;
+				reg = <0x0>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupt-parent = <&mpic>;
+				interrupts = <2 1>;
+				reg = <0x1>;
+			};
+		};
+
+		mdio@25000 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet0: ethernet@b0000 {
+			status = "disabled";
+		};
+
+		enet1: ethernet@b1000 {
+			phy-handle = <&phy0>;
+			tbi-handle = <&tbi0>;
+			phy-connection-type = "sgmii";
+		};
+
+		enet2: ethernet@b2000 {
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+		};
+
+		/* USB2 is shared with localbus, so it must be disabled
+		   by default. We can't put 'status = "disabled";' here
+		   since U-Boot doesn't clear the status property when
+		   it enables USB2. OTOH, U-Boot does create a new node
+		   when there isn't any. So, just comment it out.
+		usb@23000 {
+			phy_type = "ulpi";
+		};
+		*/
+
+		mpic: pic@40000 {
+			protected-sources = <
+			42 29 30 34	/* serial1, enet0-queue-group0 */
+			17 18 24 45	/* enet0-queue-group1, crypto */
+			>;
+		};
+
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
new file mode 100644
index 000000000000..6ec02204a44e
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
@@ -0,0 +1,148 @@
+/*
+ * P1020 RDB Core1 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts allows core1 to have l2, eth0, crypto.
+ *
+ * Please note to add "-b 1" for core1's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+	aliases {
+		ethernet0 = &enet0;
+		serial0 = &serial1;
+		};
+
+	cpus {
+		PowerPC,P1020@0 {
+		status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		ecm-law@0 {
+			status = "disabled";
+		};
+
+		ecm@1000 {
+			status = "disabled";
+		};
+
+		memory-controller@2000 {
+			status = "disabled";
+		};
+
+		i2c@3000 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		serial0: serial@4500 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			status = "disabled";
+		};
+
+		gpio: gpio-controller@f000 {
+			status = "disabled";
+		};
+
+		dma@21300 {
+			status = "disabled";
+		};
+
+		mdio@24000 {
+			status = "disabled";
+		};
+
+		mdio@25000 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@b0000 {
+			fixed-link = <1 1 1000 0 0>;
+			phy-connection-type = "rgmii-id";
+
+		};
+
+		enet1: ethernet@b1000 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@b2000 {
+			status = "disabled";
+		};
+
+		usb@22000 {
+			status = "disabled";
+		};
+
+		sdhci@2e000 {
+			status = "disabled";
+		};
+
+		mpic: pic@40000 {
+			protected-sources = <
+			16 		/* ecm, mem, L2, pci0, pci1 */
+			43 42 59	/* i2c, serial0, spi */
+			47 63 62 	/* gpio, tdm */
+			20 21 22 23	/* dma */
+			03 02 		/* mdio */
+			35 36 40	/* enet1-queue-group0 */
+			51 52 67	/* enet1-queue-group1 */
+			31 32 33	/* enet2-queue-group0 */
+			25 26 27	/* enet2-queue-group1 */
+			28 72 58 	/* usb, sdhci, crypto */
+			0xb0 0xb1 0xb2	/* message */
+			0xb3 0xb4 0xb5
+			0xb6 0xb7
+			0xe0 0xe1 0xe2	/* msi */
+			0xe3 0xe4 0xe5
+			0xe6 0xe7		/* sdhci, crypto , pci */
+			>;
+		};
+
+		msi@41600 {
+			status = "disabled";
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			status = "disabled";
+		};
+
+	};
+
+	pci0: pcie@ffe09000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe0a000 {
+		status = "disabled";
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1020si.dtsi b/arch/powerpc/boot/dts/p1020si.dtsi
new file mode 100644
index 000000000000..5c5acb66c3fc
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020si.dtsi
@@ -0,0 +1,377 @@
+/*
+ * P1020si Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	compatible = "fsl,P1020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p1020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p1020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <16 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p1020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p1020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x40000>; // L2,256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		mdio@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,etsec2-mdio";
+			reg = <0x24000 0x1000 0xb0030 0x4>;
+
+		};
+
+		mdio@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,etsec2-tbi";
+			reg = <0x25000 0x1000 0xb1030 0x4>;
+
+		};
+
+		enet0: ethernet@b0000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb0000 0x1000>;
+				interrupts = <29 2 30 2 34 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb4000 0x1000>;
+				interrupts = <17 2 18 2 24 2>;
+			};
+		};
+
+		enet1: ethernet@b1000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb1000 0x1000>;
+				interrupts = <35 2 36 2 40 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb5000 0x1000>;
+				interrupts = <51 2 52 2 67 2>;
+			};
+		};
+
+		enet2: ethernet@b2000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb2000 0x1000>;
+				interrupts = <31 2 32 2 33 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb6000 0x1000>;
+				interrupts = <25 2 26 2 27 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+		};
+
+		/* USB2 is shared with localbus, so it must be disabled
+		   by default. We can't put 'status = "disabled";' here
+		   since U-Boot doesn't clear the status property when
+		   it enables USB2. OTOH, U-Boot does create a new node
+		   when there isn't any. So, just comment it out.
+		usb@23000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <46 0x2>;
+			phy_type = "ulpi";
+		};
+		*/
+
+		sdhci@2e000 {
+			compatible = "fsl,p1020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,p1020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p1020-guts","fsl,p2020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+	};
+
+	pci1: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts
index 59ef405c1c91..4f685a779f4c 100644
--- a/arch/powerpc/boot/dts/p1022ds.dts
+++ b/arch/powerpc/boot/dts/p1022ds.dts
@@ -52,7 +52,7 @@
 		#size-cells = <1>;
 		compatible = "fsl,p1022-elbc", "fsl,elbc", "simple-bus";
 		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
+		interrupts = <19 2 0 0>;
 
 		ranges = <0x0 0x0 0xf 0xe8000000 0x08000000
 			  0x1 0x0 0xf 0xe0000000 0x08000000
@@ -157,7 +157,7 @@
 			 * IRQ8 is generated if the "EVENT" switch is pressed
 			 * and PX_CTL[EVESEL] is set to 00.
 			 */
-			interrupts = <8 8>;
+			interrupts = <8 8 0 0>;
 		};
 	};
 
@@ -178,13 +178,13 @@
 		ecm@1000 {
 			compatible = "fsl,p1022-ecm", "fsl,ecm";
 			reg = <0x1000 0x1000>;
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		memory-controller@2000 {
 			compatible = "fsl,p1022-memory-controller";
 			reg = <0x2000 0x1000>;
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		i2c@3000 {
@@ -193,7 +193,7 @@
 			cell-index = <0>;
 			compatible = "fsl-i2c";
 			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
+			interrupts = <43 2 0 0>;
 			dfsrr;
 		};
 
@@ -203,7 +203,7 @@
 			cell-index = <1>;
 			compatible = "fsl-i2c";
 			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
+			interrupts = <43 2 0 0>;
 			dfsrr;
 
 			wm8776:codec@1a {
@@ -220,7 +220,7 @@
 			compatible = "ns16550";
 			reg = <0x4500 0x100>;
 			clock-frequency = <0>;
-			interrupts = <42 2>;
+			interrupts = <42 2 0 0>;
 		};
 
 		serial1: serial@4600 {
@@ -229,7 +229,7 @@
 			compatible = "ns16550";
 			reg = <0x4600 0x100>;
 			clock-frequency = <0>;
-			interrupts = <42 2>;
+			interrupts = <42 2 0 0>;
 		};
 
 		spi@7000 {
@@ -238,7 +238,7 @@
 			#size-cells = <0>;
 			compatible = "fsl,espi";
 			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
+			interrupts = <59 0x2 0 0>;
 			espi,num-ss-bits = <4>;
 			mode = "cpu";
 
@@ -275,7 +275,7 @@
 			compatible = "fsl,mpc8610-ssi";
 			cell-index = <0>;
 			reg = <0x15000 0x100>;
-			interrupts = <75 2>;
+			interrupts = <75 2 0 0>;
 			fsl,mode = "i2s-slave";
 			codec-handle = <&wm8776>;
 			fsl,playback-dma = <&dma00>;
@@ -294,25 +294,25 @@
 				compatible = "fsl,ssi-dma-channel";
 				reg = <0x0 0x80>;
 				cell-index = <0>;
-				interrupts = <76 2>;
+				interrupts = <76 2 0 0>;
 			};
 			dma01: dma-channel@80 {
 				compatible = "fsl,ssi-dma-channel";
 				reg = <0x80 0x80>;
 				cell-index = <1>;
-				interrupts = <77 2>;
+				interrupts = <77 2 0 0>;
 			};
 			dma-channel@100 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x100 0x80>;
 				cell-index = <2>;
-				interrupts = <78 2>;
+				interrupts = <78 2 0 0>;
 			};
 			dma-channel@180 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x180 0x80>;
 				cell-index = <3>;
-				interrupts = <79 2>;
+				interrupts = <79 2 0 0>;
 			};
 		};
 
@@ -320,7 +320,7 @@
 			#gpio-cells = <2>;
 			compatible = "fsl,mpc8572-gpio";
 			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
+			interrupts = <47 0x2 0 0>;
 			gpio-controller;
 		};
 
@@ -329,7 +329,7 @@
 			reg = <0x20000 0x1000>;
 			cache-line-size = <32>;	// 32 bytes
 			cache-size = <0x40000>; // L2, 256K
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		dma@21300 {
@@ -343,25 +343,25 @@
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x0 0x80>;
 				cell-index = <0>;
-				interrupts = <20 2>;
+				interrupts = <20 2 0 0>;
 			};
 			dma-channel@80 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x80 0x80>;
 				cell-index = <1>;
-				interrupts = <21 2>;
+				interrupts = <21 2 0 0>;
 			};
 			dma-channel@100 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x100 0x80>;
 				cell-index = <2>;
-				interrupts = <22 2>;
+				interrupts = <22 2 0 0>;
 			};
 			dma-channel@180 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x180 0x80>;
 				cell-index = <3>;
-				interrupts = <23 2>;
+				interrupts = <23 2 0 0>;
 			};
 		};
 
@@ -370,7 +370,7 @@
 			#size-cells = <0>;
 			compatible = "fsl-usb2-dr";
 			reg = <0x22000 0x1000>;
-			interrupts = <28 0x2>;
+			interrupts = <28 0x2 0 0>;
 			phy_type = "ulpi";
 		};
 
@@ -381,11 +381,11 @@
 			reg = <0x24000 0x1000 0xb0030 0x4>;
 
 			phy0: ethernet-phy@0 {
-				interrupts = <3 1>;
+				interrupts = <3 1 0 0>;
 				reg = <0x1>;
 			};
 			phy1: ethernet-phy@1 {
-				interrupts = <9 1>;
+				interrupts = <9 1 0 0>;
 				reg = <0x2>;
 			};
 		};
@@ -416,13 +416,13 @@
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB0000 0x1000>;
-				interrupts = <29 2 30 2 34 2>;
+				interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>;
 			};
 			queue-group@1{
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB4000 0x1000>;
-				interrupts = <17 2 18 2 24 2>;
+				interrupts = <17 2 0 0 18 2 0 0 24 2 0 0>;
 			};
 		};
 
@@ -443,20 +443,20 @@
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB1000 0x1000>;
-				interrupts = <35 2 36 2 40 2>;
+				interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>;
 			};
 			queue-group@1{
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB5000 0x1000>;
-				interrupts = <51 2 52 2 67 2>;
+				interrupts = <51 2 0 0 52 2 0 0 67 2 0 0>;
 			};
 		};
 
 		sdhci@2e000 {
 			compatible = "fsl,p1022-esdhc", "fsl,esdhc";
 			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
+			interrupts = <72 0x2 0 0>;
 			fsl,sdhci-auto-cmd12;
 			/* Filled in by U-Boot */
 			clock-frequency = <0>;
@@ -467,7 +467,7 @@
 				     "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
 				     "fsl,sec2.0";
 			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
+			interrupts = <45 2 0 0 58 2 0 0>;
 			fsl,num-channels = <4>;
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0x97c>;
@@ -478,14 +478,14 @@
 			compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
 			reg = <0x18000 0x1000>;
 			cell-index = <1>;
-			interrupts = <74 0x2>;
+			interrupts = <74 0x2 0 0>;
 		};
 
 		sata@19000 {
 			compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
 			reg = <0x19000 0x1000>;
 			cell-index = <2>;
-			interrupts = <41 0x2>;
+			interrupts = <41 0x2 0 0>;
 		};
 
 		power@e0070{
@@ -496,21 +496,33 @@
 		display@10000 {
 			compatible = "fsl,diu", "fsl,p1022-diu";
 			reg = <0x10000 1000>;
-			interrupts = <64 2>;
+			interrupts = <64 2 0 0>;
 		};
 
 		timer@41100 {
 			compatible = "fsl,mpic-global-timer";
-			reg = <0x41100 0x204>;
-			interrupts = <0xf7 0x2>;
+			reg = <0x41100 0x100 0x41300 4>;
+			interrupts = <0 0 3 0
+			              1 0 3 0
+			              2 0 3 0
+			              3 0 3 0>;
+		};
+
+		timer@42100 {
+			compatible = "fsl,mpic-global-timer";
+			reg = <0x42100 0x100 0x42300 4>;
+			interrupts = <4 0 3 0
+			              5 0 3 0
+			              6 0 3 0
+			              7 0 3 0>;
 		};
 
 		mpic: pic@40000 {
 			interrupt-controller;
 			#address-cells = <0>;
-			#interrupt-cells = <2>;
+			#interrupt-cells = <4>;
 			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
+			compatible = "fsl,mpic";
 			device_type = "open-pic";
 		};
 
@@ -519,14 +531,14 @@
 			reg = <0x41600 0x80>;
 			msi-available-ranges = <0 0x100>;
 			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
+				0xe0 0 0 0
+				0xe1 0 0 0
+				0xe2 0 0 0
+				0xe3 0 0 0
+				0xe4 0 0 0
+				0xe5 0 0 0
+				0xe6 0 0 0
+				0xe7 0 0 0>;
 		};
 
 		global-utilities@e0000 {	//global utilities block
@@ -547,7 +559,7 @@
 		ranges = <0x2000000 0x0 0xa0000000 0xc 0x20000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -582,7 +594,7 @@
 		ranges = <0x2000000 0x0 0xc0000000 0xc 0x40000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -618,7 +630,7 @@
 		ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts
index 11019142813c..2bcf3683d223 100644
--- a/arch/powerpc/boot/dts/p2020ds.dts
+++ b/arch/powerpc/boot/dts/p2020ds.dts
@@ -1,7 +1,7 @@
 /*
  * P2020 DS Device Tree Source
  *
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020DS";
 	compatible = "fsl,P2020DS";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
@@ -27,35 +26,13 @@
 		pci2 = &pci2;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
 
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
 		compatible = "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
-
 		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
 			  0x1 0x0 0x0 0xe0000000 0x08000000
 			  0x2 0x0 0x0 0xffa00000 0x00040000
@@ -158,352 +135,77 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0 0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
-
-		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
 
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		spi@7000 {
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
+		usb@22000 {
+			phy_type = "ulpi";
 		};
 
-		dma@c300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0xc300 0x4>;
-			ranges = <0x0 0xc100 0x200>;
-			cell-index = <1>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
-				interrupts = <76 2>;
+				interrupts = <3 1>;
+				reg = <0x0>;
 			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
+			phy1: ethernet-phy@1 {
 				interrupt-parent = <&mpic>;
-				interrupts = <77 2>;
+				interrupts = <3 1>;
+				reg = <0x1>;
 			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
+			phy2: ethernet-phy@2 {
 				interrupt-parent = <&mpic>;
-				interrupts = <78 2>;
+				interrupts = <3 1>;
+				reg = <0x2>;
 			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <79 2>;
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
-		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
 		};
 
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2, 512k
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
+		mdio@25520 {
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
+		mdio@26520 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
 
-		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
-			phy_type = "ulpi";
 		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@0 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x0>;
-				};
-				phy1: ethernet-phy@1 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x1>;
-				};
-				phy2: ethernet-phy@2 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x2>;
-				};
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi1: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi2>;
 			phy-handle = <&phy2>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi2: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
 		};
 
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
 
 		msi@41600 {
 			compatible = "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
 		};
 	};
 
 	pci0: pcie@ffe08000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe08000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <24 2>;
 		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -528,18 +230,8 @@
 	};
 
 	pci1: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
 		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
 		interrupt-map = <
 
@@ -667,18 +359,8 @@
 	};
 
 	pci2: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
 		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts
index e2d48fd4416e..3782a58f13be 100644
--- a/arch/powerpc/boot/dts/p2020rdb.dts
+++ b/arch/powerpc/boot/dts/p2020rdb.dts
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
@@ -26,34 +25,11 @@
 		pci1 = &pci1;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
-
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
 
 		/* NOR and NAND Flashes */
 		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,90 +141,16 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
-
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
+	spi@7000 {
 
-		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
-
-			fsl_m25p80@0 {
+		fsl_m25p80@0 {
 				#address-cells = <1>;
 				#size-cells = <1>;
 				compatible = "fsl,espi-flash";
@@ -294,254 +196,68 @@
 			};
 		};
 
-		dma@c300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0xc300 0x4>;
-			ranges = <0x0 0xc100 0x200>;
-			cell-index = <1>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <76 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <77 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <78 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <79 2>;
-			};
-		};
-
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2,512K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
+		usb@22000 {
+			phy_type = "ulpi";
 		};
 
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
+				interrupts = <3 1>;
+				reg = <0x0>;
+				};
+			phy1: ethernet-phy@1 {
 				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
+				interrupts = <3 1>;
+				reg = <0x1>;
+				};
+		};
+
+		mdio@25520 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
 		};
 
-		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
-			phy_type = "ulpi";
+		mdio@26520 {
+			status = "disabled";
 		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@0 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x0>;
-				};
-				phy1: ethernet-phy@1 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x1>;
-				};
-			};
 		};
 
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "sgmii";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
 
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-
-		msi@41600 {
-			compatible = "fsl,p2020-msi", "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
+	};
 
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
+	pci0: pcie@ffe08000 {
+		status = "disabled";
 	};
 
-	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
+	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
-		pcie@0 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+			pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
 			#address-cells = <3>;
@@ -556,19 +272,17 @@
 		};
 	};
 
-	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
+	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
index b69c3a5dc858..fc8ddddfccb6 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
@@ -14,12 +14,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet1 = &enet1;
@@ -29,91 +28,33 @@
 	};
 
 	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
+		PowerPC,P2020@1 {
+		status = "disabled";
 		};
+
 	};
 
 	memory {
 		device_type = "memory";
 	};
 
-	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
+	localbus@ffe05000 {
+		status = "disabled";
+	};
 
+	soc@ffe00000 {
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
+		serial1: serial@4600 {
+			status = "disabled";
 		};
 
 		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
 
 			fsl_m25p80@0 {
 				#address-cells = <1>;
@@ -161,76 +102,15 @@
 			};
 		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2,512K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
+		dma@c300 {
+			status = "disabled";
 		};
 
 		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
 			phy_type = "ulpi";
 		};
 
 		mdio@24520 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,gianfar-mdio";
-			reg = <0x24520 0x20>;
 
 			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
@@ -245,29 +125,21 @@
 		};
 
 		mdio@25520 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,gianfar-tbi";
-			reg = <0x26520 0x20>;
-
 			tbi0: tbi-phy@11 {
 				reg = <0x11>;
 				device_type = "tbi-phy";
 			};
 		};
 
+		mdio@26520 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@24000 {
+			status = "disabled";
+		};
+
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "sgmii";
@@ -275,49 +147,12 @@
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
 
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
 
 		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
 			protected-sources = <
 			42 76 77 78 79 /* serial1 , dma2 */
 			29 30 34 26 /* enet0, pci1 */
@@ -326,26 +161,28 @@
 			>;
 		};
 
-		global-utilities@e0000 {
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
+		msi@41600 {
+			status = "disabled";
 		};
+
+
 	};
 
-	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
+	pci0: pcie@ffe08000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -360,4 +197,8 @@
 				  0x0 0x100000>;
 		};
 	};
+
+	pci2: pcie@ffe0a000 {
+		status = "disabled";
+	};
 };
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
index 7a31d46c01b0..261c34ba45ec 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
@@ -15,27 +15,21 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
-		serial0 = &serial0;
+		serial0 = &serial1;
 		pci1 = &pci1;
 	};
 
 	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
+		PowerPC,P2020@0 {
+		status = "disabled";
 		};
 	};
 
@@ -43,20 +37,37 @@
 		device_type = "memory";
 	};
 
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		serial0: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
+		ecm-law@0 {
+			status = "disabled";
+		};
+
+		ecm@1000 {
+			status = "disabled";
+		};
+
+		memory-controller@2000 {
+			status = "disabled";
+		};
+
+		i2c@3000 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		serial0: serial@4500 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			status = "disabled";
 		};
 
 		dma@c300 {
@@ -96,6 +107,10 @@
 			};
 		};
 
+		gpio: gpio-controller@f000 {
+			status = "disabled";
+		};
+
 		L2: l2-cache-controller@20000 {
 			compatible = "fsl,p2020-l2-cache-controller";
 			reg = <0x20000 0x1000>;
@@ -104,31 +119,49 @@
 			interrupt-parent = <&mpic>;
 		};
 
+		dma@21300 {
+			status = "disabled";
+		};
+
+		usb@22000 {
+			status = "disabled";
+		};
+
+		mdio@24520 {
+			status = "disabled";
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		mdio@26520 {
+			status = "disabled";
+		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
 
 		};
 
+		enet1: ethernet@25000 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@26000 {
+			status = "disabled";
+		};
+
+		sdhci@2e000 {
+			status = "disabled";
+		};
+
+		crypto@30000 {
+			status = "disabled";
+		};
+
 		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
 			protected-sources = <
 			17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */
 			16 20 21 22 23 28 	/* L2, dma1, USB */
@@ -152,21 +185,32 @@
 				0xe7 0>;
 			interrupt-parent = <&mpic>;
 		};
+
+		global-utilities@e0000 {	//global utilities block
+			status = "disabled";
+		};
+
 	};
 
-	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
+	pci0: pcie@ffe08000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020si.dtsi b/arch/powerpc/boot/dts/p2020si.dtsi
new file mode 100644
index 000000000000..6def17f265d3
--- /dev/null
+++ b/arch/powerpc/boot/dts/p2020si.dtsi
@@ -0,0 +1,382 @@
+/*
+ * P2020 Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	compatible = "fsl,P2020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P2020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P2020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p2020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p2020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p2020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p2020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>; // L2,512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-mdio";
+			reg = <0x24520 0x20>;
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x520 0x20>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+
+		};
+
+		sdhci@2e000 {
+			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,p2020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p2020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe08000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe08000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+	};
+
+	pci1: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+	};
+
+	pci2: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+	};
+};
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
new file mode 100644
index 000000000000..06c1961bd124
--- /dev/null
+++ b/arch/powerpc/boot/epapr.c
@@ -0,0 +1,66 @@
+/*
+ * Bootwrapper for ePAPR compliant firmwares
+ *
+ * Copyright 2010 David Gibson <david@gibson.dropbear.id.au>, IBM Corporation.
+ *
+ * Based on earlier bootwrappers by:
+ * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\
+ *   and
+ * Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include <libfdt.h>
+
+BSS_STACK(4096);
+
+#define EPAPR_SMAGIC	0x65504150
+#define EPAPR_EMAGIC	0x45504150
+
+static unsigned epapr_magic;
+static unsigned long ima_size;
+static unsigned long fdt_addr;
+
+static void platform_fixups(void)
+{
+	if ((epapr_magic != EPAPR_EMAGIC)
+	    && (epapr_magic != EPAPR_SMAGIC))
+		fatal("r6 contained 0x%08x instead of ePAPR magic number\n",
+		      epapr_magic);
+
+	if (ima_size < (unsigned long)_end)
+		printf("WARNING: Image loaded outside IMA!"
+		       " (_end=%p, ima_size=0x%lx)\n", _end, ima_size);
+	if (ima_size < fdt_addr)
+		printf("WARNING: Device tree address is outside IMA!"
+		       "(fdt_addr=0x%lx, ima_size=0x%lx)\n", fdt_addr,
+		       ima_size);
+	if (ima_size < fdt_addr + fdt_totalsize((void *)fdt_addr))
+		printf("WARNING: Device tree extends outside IMA!"
+		       " (fdt_addr=0x%lx, size=0x%x, ima_size=0x%lx\n",
+		       fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	epapr_magic = r6;
+	ima_size = r7;
+	fdt_addr = r3;
+
+	/* FIXME: we should process reserve entries */
+
+	simple_alloc_init(_end, ima_size - (unsigned long)_end, 32, 64);
+
+	fdt_init((void *)fdt_addr);
+
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index cb97e7511d7e..c74531af72c0 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -39,6 +39,7 @@ dts=
 cacheit=
 binary=
 gzip=.gz
+pie=
 
 # cross-compilation prefix
 CROSS=
@@ -157,9 +158,10 @@ pmac|chrp)
     platformo=$object/of.o
     ;;
 coff)
-    platformo=$object/of.o
+    platformo="$object/crt0.o $object/of.o"
     lds=$object/zImage.coff.lds
     link_address='0x500000'
+    pie=
     ;;
 miboot|uboot)
     # miboot and U-boot want just the bare bits, not an ELF binary
@@ -208,6 +210,7 @@ ps3)
     ksection=.kernel:vmlinux.bin
     isection=.kernel:initrd
     link_address=''
+    pie=
     ;;
 ep88xc|ep405|ep8248e)
     platformo="$object/fixed-head.o $object/$platform.o"
@@ -244,6 +247,10 @@ gamecube|wii)
 treeboot-iss4xx-mpic)
     platformo="$object/treeboot-iss4xx.o"
     ;;
+epapr)
+    link_address='0x20000000'
+    pie=-pie
+    ;;
 esac
 
 vmz="$tmpdir/`basename \"$kernel\"`.$ext"
@@ -251,7 +258,7 @@ if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then
     ${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
 
     if [ -n "$gzip" ]; then
-        gzip -f -9 "$vmz.$$"
+        gzip -n -f -9 "$vmz.$$"
     fi
 
     if [ -n "$cacheit" ]; then
@@ -310,9 +317,9 @@ fi
 
 if [ "$platform" != "miboot" ]; then
     if [ -n "$link_address" ] ; then
-        text_start="-Ttext $link_address --defsym _start=$link_address"
+        text_start="-Ttext $link_address"
     fi
-    ${CROSS}ld -m elf32ppc -T $lds $text_start -o "$ofile" \
+    ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
@@ -336,7 +343,7 @@ coff)
     $objbin/hack-coff "$ofile"
     ;;
 cuboot*)
-    gzip -f -9 "$ofile"
+    gzip -n -f -9 "$ofile"
     ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a "$base" -e "$entry" \
             $uboot_version -d "$ofile".gz "$ofile"
     ;;
@@ -383,6 +390,6 @@ ps3)
 
     odir="$(dirname "$ofile.bin")"
     rm -f "$odir/otheros.bld"
-    gzip --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
+    gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
     ;;
 esac
diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S
index 856dc78b14ef..de4c9e3c9344 100644
--- a/arch/powerpc/boot/zImage.coff.lds.S
+++ b/arch/powerpc/boot/zImage.coff.lds.S
@@ -3,13 +3,13 @@ ENTRY(_zimage_start_opd)
 EXTERN(_zimage_start_opd)
 SECTIONS
 {
-  _start = .;
   .text      :
   {
+    _start = .;
     *(.text)
     *(.fixup)
+    _etext = .;
   }
-  _etext = .;
   . = ALIGN(4096);
   .data    :
   {
@@ -17,9 +17,7 @@ SECTIONS
     *(.data*)
     *(__builtin_*)
     *(.sdata*)
-    __got2_start = .;
     *(.got2)
-    __got2_end = .;
 
     _dtb_start = .;
     *(.kernel:dtb)
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 0962d62bdb50..2bd8731f1365 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -3,49 +3,64 @@ ENTRY(_zimage_start)
 EXTERN(_zimage_start)
 SECTIONS
 {
-  _start = .;
   .text      :
   {
+    _start = .;
     *(.text)
     *(.fixup)
+    _etext = .;
   }
-  _etext = .;
   . = ALIGN(4096);
   .data    :
   {
     *(.rodata*)
     *(.data*)
     *(.sdata*)
-    __got2_start = .;
     *(.got2)
-    __got2_end = .;
   }
+  .dynsym : { *(.dynsym) }
+  .dynstr : { *(.dynstr) }
+  .dynamic :
+  {
+    __dynamic_start = .;
+    *(.dynamic)
+  }
+  .hash : { *(.hash) }
+  .interp : { *(.interp) }
+  .rela.dyn : { *(.rela*) }
 
   . = ALIGN(8);
-  _dtb_start = .;
-  .kernel:dtb : { *(.kernel:dtb) }
-  _dtb_end = .;
-
-  . = ALIGN(4096);
-  _vmlinux_start =  .;
-  .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) }
-  _vmlinux_end =  .;
+  .kernel:dtb :
+  {
+    _dtb_start = .;
+    *(.kernel:dtb)
+    _dtb_end = .;
+  }
 
   . = ALIGN(4096);
-  _initrd_start =  .;
-  .kernel:initrd : { *(.kernel:initrd) }
-  _initrd_end =  .;
+  .kernel:vmlinux.strip :
+  {
+    _vmlinux_start =  .;
+    *(.kernel:vmlinux.strip)
+    _vmlinux_end =  .;
+  }
 
   . = ALIGN(4096);
-  _edata  =  .;
+  .kernel:initrd :
+  {
+    _initrd_start =  .;
+    *(.kernel:initrd)
+    _initrd_end =  .;
+  }
 
   . = ALIGN(4096);
-  __bss_start = .;
   .bss       :
   {
-   *(.sbss)
-   *(.bss)
+    _edata  =  .;
+    __bss_start = .;
+    *(.sbss)
+    *(.bss)
+    *(COMMON)
+    _end = . ;
   }
-  . = ALIGN(4096);
-  _end = . ;
 }
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index c683bce4c26e..126ef1b08a01 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -104,7 +104,6 @@ CONFIG_ROOT_NFS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index a721cd3d793f..abcf00ad939e 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -101,7 +101,6 @@ CONFIG_ROOT_NFS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
index 55e0725500dc..11662c217ac0 100644
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
@@ -58,7 +58,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
index d724095530a6..ebe9b30b0721 100644
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
@@ -59,7 +59,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 4b44beaa21ae..eb25229b387a 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -63,7 +63,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
index b614508d6fd2..f51c7ebc181e 100644
--- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
+++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
@@ -168,7 +168,6 @@ CONFIG_MAC_PARTITION=y
 CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index f9e6a3ea5a64..2a84fd7f631c 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -132,8 +132,8 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_IND=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig
index 9fa1613e5e2b..d32283555b53 100644
--- a/arch/powerpc/configs/e55xx_smp_defconfig
+++ b/arch/powerpc/configs/e55xx_smp_defconfig
@@ -6,10 +6,10 @@ CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SPARSE_IRQ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
@@ -25,8 +25,32 @@ CONFIG_P5020_DS=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
 # CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_ARPD=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+CONFIG_IP_SCTP=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_LOOP=y
@@ -34,6 +58,9 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=131072
 CONFIG_MISC_DEVICES=y
 CONFIG_EEPROM_LEGACY=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_NET_ETHERNET=y
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
@@ -64,22 +91,14 @@ CONFIG_NLS=y
 CONFIG_NLS_UTF8=m
 CONFIG_CRC_T10DIF=y
 CONFIG_CRC_ITU_T=m
-CONFIG_LIBCRC32C=m
 CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_VIRQ_DEBUG=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1=m
-CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index c06a86c33098..96b89df7752a 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -204,7 +204,6 @@ CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 942ced90557c..de65841aa04e 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -206,7 +206,6 @@ CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig
index 038a308cbfc4..a1cc8179e9fd 100644
--- a/arch/powerpc/configs/mpc86xx_defconfig
+++ b/arch/powerpc/configs/mpc86xx_defconfig
@@ -171,7 +171,6 @@ CONFIG_MAC_PARTITION=y
 CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index ac4fc41035f6..f8b394a76ac3 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -112,8 +112,8 @@ CONFIG_IRDA_CACHE_LAST_LSAP=y
 CONFIG_IRDA_FAST_RR=y
 CONFIG_IRTTY_SIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 0a10fb009ef7..214208924a9c 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -351,8 +351,8 @@ CONFIG_VLSI_FIR=m
 CONFIG_VIA_FIR=m
 CONFIG_MCS_FIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index caba919f65d8..6472322bf13b 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -52,8 +52,8 @@ CONFIG_IP_PNP_DHCP=y
 # CONFIG_INET_DIAG is not set
 CONFIG_IPV6=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 249ddd0a27cd..7de13865508c 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -146,12 +146,18 @@ CONFIG_SCSI_MULTI_LUN=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
 CONFIG_SCSI_IBMVSCSI=y
 CONFIG_SCSI_IBMVFC=m
 CONFIG_SCSI_SYM53C8XX_2=y
 CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
 CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
 CONFIG_ATA=y
 # CONFIG_ATA_SFF is not set
@@ -197,6 +203,8 @@ CONFIG_S2IO=m
 CONFIG_MYRI10GE=m
 CONFIG_NETXEN_NIC=m
 CONFIG_MLX4_EN=m
+CONFIG_QLGE=m
+CONFIG_BE2NET=m
 CONFIG_PPP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index be3cdf9134ce..c0d842cfd012 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -157,6 +157,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_476_DD2			ASM_CONST(0x0000000000010000)
 #define CPU_FTR_NEED_COHERENT		ASM_CONST(0x0000000000020000)
 #define CPU_FTR_NO_BTIC			ASM_CONST(0x0000000000040000)
+#define CPU_FTR_DEBUG_LVL_EXC		ASM_CONST(0x0000000000080000)
 #define CPU_FTR_NODSISRALIGN		ASM_CONST(0x0000000000100000)
 #define CPU_FTR_PPC_LE			ASM_CONST(0x0000000000200000)
 #define CPU_FTR_REAL_LE			ASM_CONST(0x0000000000400000)
@@ -178,22 +179,18 @@ extern const char *powerpc_base_platform;
 #define LONG_ASM_CONST(x)		0
 #endif
 
-#define CPU_FTR_SLB			LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_16M_PAGE		LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_TLBIEL			LONG_ASM_CONST(0x0000000400000000)
+
+#define CPU_FTR_HVMODE_206		LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_CFAR			LONG_ASM_CONST(0x0000001000000000)
 #define CPU_FTR_IABR			LONG_ASM_CONST(0x0000002000000000)
 #define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000004000000000)
 #define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000008000000000)
 #define CPU_FTR_SMT			LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_LOCKLESS_TLBIE		LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_CI_LARGE_PAGE		LONG_ASM_CONST(0x0000100000000000)
 #define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000200000000000)
 #define CPU_FTR_PURR			LONG_ASM_CONST(0x0000400000000000)
 #define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000800000000000)
 #define CPU_FTR_SPURR			LONG_ASM_CONST(0x0001000000000000)
 #define CPU_FTR_DSCR			LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_1T_SEGMENT		LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_NO_SLBIE_B		LONG_ASM_CONST(0x0008000000000000)
 #define CPU_FTR_VSX			LONG_ASM_CONST(0x0010000000000000)
 #define CPU_FTR_SAO			LONG_ASM_CONST(0x0020000000000000)
 #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0040000000000000)
@@ -202,12 +199,14 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
 #define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0400000000000000)
 #define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0800000000000000)
+#define CPU_FTR_ICSWX			LONG_ASM_CONST(0x1000000000000000)
 
 #ifndef __ASSEMBLY__
 
-#define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_SLB | \
-				 CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
-				 CPU_FTR_NODSISRALIGN | CPU_FTR_16M_PAGE)
+#define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
+
+#define MMU_FTR_PPCAS_ARCH_V2 	(MMU_FTR_SLB | MMU_FTR_TLBIEL | \
+				 MMU_FTR_16M_PAGE)
 
 /* We only set the altivec features if the kernel was compiled with altivec
  * support
@@ -382,10 +381,13 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_E500_2	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
 	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
 	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
-	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
+#define CPU_FTRS_E500MC	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
 	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DBELL)
+#define CPU_FTRS_E5500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
+	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_DEBUG_LVL_EXC)
 #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
@@ -405,41 +407,46 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_POWER5	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
-	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
-	    CPU_FTR_POPCNTB)
+	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
 #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR)
 #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_ICSWX | CPU_FTR_CFAR)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | \
-	    CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
+	    CPU_FTR_PAUSE_ZERO  | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
 	    CPU_FTR_UNALIGNED_LD_STD)
 #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-	    CPU_FTR_PPCAS_ARCH_V2 | \
-	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
-	    CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_NO_SLBIE_B)
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_PURR | CPU_FTR_REAL_LE)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
 
+#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
+		     CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
+
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_BOOK3E
+#define CPU_FTRS_POSSIBLE	(CPU_FTRS_E5500 | CPU_FTRS_A2)
+#else
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
 	    CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T |		\
-	    CPU_FTR_1T_SEGMENT | CPU_FTR_VSX)
+	    CPU_FTR_VSX)
+#endif
 #else
 enum {
 	CPU_FTRS_POSSIBLE =
@@ -473,16 +480,21 @@ enum {
 #endif
 #ifdef CONFIG_E500
 	    CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC |
+	    CPU_FTRS_E5500 |
 #endif
 	    0,
 };
 #endif /* __powerpc64__ */
 
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_BOOK3E
+#define CPU_FTRS_ALWAYS		(CPU_FTRS_E5500 & CPU_FTRS_A2)
+#else
 #define CPU_FTRS_ALWAYS		\
 	    (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &	\
 	    CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 &	\
 	    CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
+#endif
 #else
 enum {
 	CPU_FTRS_ALWAYS =
@@ -513,6 +525,7 @@ enum {
 #endif
 #ifdef CONFIG_E500
 	    CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC &
+	    CPU_FTRS_E5500 &
 #endif
 	    CPU_FTRS_POSSIBLE,
 };
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index f71bb4c118b4..ce516e5eb0d3 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -37,16 +37,16 @@ extern cpumask_t threads_core_mask;
  * This can typically be used for things like IPI for tlb invalidations
  * since those need to be done only once per core/TLB
  */
-static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads)
+static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
 {
 	cpumask_t	tmp, res;
 	int		i;
 
-	res = CPU_MASK_NONE;
+	cpumask_clear(&res);
 	for (i = 0; i < NR_CPUS; i += threads_per_core) {
-		cpus_shift_left(tmp, threads_core_mask, i);
-		if (cpus_intersects(threads, tmp))
-			cpu_set(i, res);
+		cpumask_shift_left(&tmp, &threads_core_mask, i);
+		if (cpumask_intersects(threads, &tmp))
+			cpumask_set_cpu(i, &res);
 	}
 	return res;
 }
@@ -58,7 +58,7 @@ static inline int cpu_nr_cores(void)
 
 static inline cpumask_t cpu_online_cores_map(void)
 {
-	return cpu_thread_mask_to_cores(cpu_online_map);
+	return cpu_thread_mask_to_cores(cpu_online_mask);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 0893ab9343a6..9c70d0ca96d4 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -27,9 +27,8 @@ enum ppc_dbell {
 	PPC_G_DBELL_MC = 4,	/* guest mcheck doorbell */
 };
 
-extern void doorbell_message_pass(int target, int msg);
+extern void doorbell_cause_ipi(int cpu, unsigned long data);
 extern void doorbell_exception(struct pt_regs *regs);
-extern void doorbell_check_self(void);
 extern void doorbell_setup_this_cpu(void);
 
 static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f0fb4fc1f6e6..45921672b97a 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -52,6 +52,10 @@ extern struct ppc_emulated {
 #ifdef CONFIG_VSX
 	struct ppc_emulated_entry vsx;
 #endif
+#ifdef CONFIG_PPC64
+	struct ppc_emulated_entry mfdscr;
+	struct ppc_emulated_entry mtdscr;
+#endif
 } ppc_emulated;
 
 extern u32 ppc_warn_emulated;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 7778d6f0c878..f5dfe3411f64 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -46,6 +46,7 @@
 #define EX_CCR		60
 #define EX_R3		64
 #define EX_LR		72
+#define EX_CFAR		80
 
 /*
  * We're short on space and time in the exception prolog, so we can't
@@ -56,30 +57,40 @@
 #define LOAD_HANDLER(reg, label)					\
 	addi	reg,reg,(label)-_stext;	/* virt addr of handler ... */
 
-#define EXCEPTION_PROLOG_1(area)				\
-	mfspr	r13,SPRN_SPRG_PACA;	/* get paca address into r13 */	\
+/* Exception register prefixes */
+#define EXC_HV	H
+#define EXC_STD
+
+#define EXCEPTION_PROLOG_1(area)					\
+	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
 	std	r10,area+EX_R10(r13);					\
 	std	r11,area+EX_R11(r13);					\
 	std	r12,area+EX_R12(r13);					\
-	mfspr	r9,SPRN_SPRG_SCRATCH0;					\
+	BEGIN_FTR_SECTION_NESTED(66);					\
+	mfspr	r10,SPRN_CFAR;						\
+	std	r10,area+EX_CFAR(r13);					\
+	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		\
+	GET_SCRATCH0(r9);						\
 	std	r9,area+EX_R13(r13);					\
 	mfcr	r9
 
-#define EXCEPTION_PROLOG_PSERIES_1(label)				\
+#define __EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
 	ld	r10,PACAKMSR(r13);	/* get MSR value for kernel */	\
-	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
 	LOAD_HANDLER(r12,label)						\
-	mtspr	SPRN_SRR0,r12;						\
-	mfspr	r12,SPRN_SRR1;		/* and SRR1 */			\
-	mtspr	SPRN_SRR1,r10;						\
-	rfid;								\
+	mtspr	SPRN_##h##SRR0,r12;					\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	mtspr	SPRN_##h##SRR1,r10;					\
+	h##rfid;							\
 	b	.	/* prevent speculative execution */
+#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
+	__EXCEPTION_PROLOG_PSERIES_1(label, h)
 
-#define EXCEPTION_PROLOG_PSERIES(area, label)				\
+#define EXCEPTION_PROLOG_PSERIES(area, label, h)			\
 	EXCEPTION_PROLOG_1(area);					\
-	EXCEPTION_PROLOG_PSERIES_1(label);
+	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
 /*
  * The common exception prolog is used for all except a few exceptions
@@ -98,10 +109,11 @@
 	beq-	1f;							   \
 	ld	r1,PACAKSAVE(r13);	/* kernel stack to use		*/ \
 1:	cmpdi	cr1,r1,0;		/* check if r1 is in userspace	*/ \
-	bge-	cr1,2f;			/* abort if it is		*/ \
-	b	3f;							   \
-2:	li	r1,(n);			/* will be reloaded later	*/ \
+	blt+	cr1,3f;			/* abort if it is		*/ \
+	li	r1,(n);			/* will be reloaded later	*/ \
 	sth	r1,PACA_TRAP_SAVE(r13);					   \
+	std	r3,area+EX_R3(r13);					   \
+	addi	r3,r13,area;		/* r3 -> where regs are saved*/	   \
 	b	bad_stack;						   \
 3:	std	r9,_CCR(r1);		/* save CR in stackframe	*/ \
 	std	r11,_NIP(r1);		/* save SRR0 in stackframe	*/ \
@@ -123,6 +135,10 @@
 	std	r9,GPR11(r1);						   \
 	std	r10,GPR12(r1);						   \
 	std	r11,GPR13(r1);						   \
+	BEGIN_FTR_SECTION_NESTED(66);					   \
+	ld	r10,area+EX_CFAR(r13);					   \
+	std	r10,ORIG_GPR3(r1);					   \
+	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		   \
 	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
 	mflr	r9;			/* save LR in stackframe	*/ \
 	std	r9,_LINK(r1);						   \
@@ -143,57 +159,62 @@
 /*
  * Exception vectors.
  */
-#define STD_EXCEPTION_PSERIES(n, label)			\
-	. = n;						\
+#define STD_EXCEPTION_PSERIES(loc, vec, label)		\
+	. = loc;					\
 	.globl label##_pSeries;				\
 label##_pSeries:					\
 	HMT_MEDIUM;					\
-	DO_KVM	n;					\
-	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+	DO_KVM	vec;					\
+	SET_SCRATCH0(r13);		/* save r13 */		\
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD)
 
-#define HSTD_EXCEPTION_PSERIES(n, label)		\
-	. = n;						\
-	.globl label##_pSeries;				\
-label##_pSeries:					\
+#define STD_EXCEPTION_HV(loc, vec, label)		\
+	. = loc;					\
+	.globl label##_hv;				\
+label##_hv:						\
 	HMT_MEDIUM;					\
-	mtspr	SPRN_SPRG_SCRATCH0,r20;	/* save r20 */	\
-	mfspr	r20,SPRN_HSRR0;		/* copy HSRR0 to SRR0 */ \
-	mtspr	SPRN_SRR0,r20;				\
-	mfspr	r20,SPRN_HSRR1;		/* copy HSRR0 to SRR0 */ \
-	mtspr	SPRN_SRR1,r20;				\
-	mfspr	r20,SPRN_SPRG_SCRATCH0;	/* restore r20 */ \
-	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+	DO_KVM	vec;					\
+	SET_SCRATCH0(r13);	/* save r13 */		\
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV)
 
-
-#define MASKABLE_EXCEPTION_PSERIES(n, label)				\
-	. = n;								\
-	.globl label##_pSeries;						\
-label##_pSeries:							\
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h)			\
 	HMT_MEDIUM;							\
-	DO_KVM	n;							\
-	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
-	mfspr	r13,SPRN_SPRG_PACA;	/* get paca address into r13 */	\
+	DO_KVM	vec;							\
+	SET_SCRATCH0(r13);    /* save r13 */				\
+	GET_PACA(r13);							\
 	std	r9,PACA_EXGEN+EX_R9(r13);	/* save r9, r10 */	\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
 	lbz	r10,PACASOFTIRQEN(r13);					\
 	mfcr	r9;							\
 	cmpwi	r10,0;							\
-	beq	masked_interrupt;					\
-	mfspr	r10,SPRN_SPRG_SCRATCH0;					\
+	beq	masked_##h##interrupt;					\
+	GET_SCRATCH0(r10);						\
 	std	r10,PACA_EXGEN+EX_R13(r13);				\
 	std	r11,PACA_EXGEN+EX_R11(r13);				\
 	std	r12,PACA_EXGEN+EX_R12(r13);				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
 	ld	r10,PACAKMSR(r13);	/* get MSR value for kernel */	\
-	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
 	LOAD_HANDLER(r12,label##_common)				\
-	mtspr	SPRN_SRR0,r12;						\
-	mfspr	r12,SPRN_SRR1;		/* and SRR1 */			\
-	mtspr	SPRN_SRR1,r10;						\
-	rfid;								\
+	mtspr	SPRN_##h##SRR0,r12;					\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	mtspr	SPRN_##h##SRR1,r10;					\
+	h##rfid;							\
 	b	.	/* prevent speculative execution */
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h)			\
+	__MASKABLE_EXCEPTION_PSERIES(vec, label, h)
+
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label)			\
+	. = loc;							\
+	.globl label##_pSeries;						\
+label##_pSeries:							\
+	_MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_STD)
+
+#define MASKABLE_EXCEPTION_HV(loc, vec, label)				\
+	. = loc;							\
+	.globl label##_hv;						\
+label##_hv:								\
+	_MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_HV)
 
 #ifdef CONFIG_PPC_ISERIES
 #define DISABLE_INTS				\
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 921a8470e18a..9a67a38bf7b9 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -49,7 +49,7 @@ label##5:							\
 	FTR_ENTRY_OFFSET label##2b-label##5b;			\
 	FTR_ENTRY_OFFSET label##3b-label##5b;			\
 	FTR_ENTRY_OFFSET label##4b-label##5b;			\
-	.ifgt (label##4b-label##3b)-(label##2b-label##1b);	\
+	.ifgt (label##4b- label##3b)-(label##2b- label##1b);	\
 	.error "Feature section else case larger than body";	\
 	.endif;							\
 	.popsection;
@@ -146,6 +146,19 @@ label##5:							\
 
 #ifndef __ASSEMBLY__
 
+#define ASM_FTR_IF(section_if, section_else, msk, val)	\
+	stringify_in_c(BEGIN_FTR_SECTION)			\
+	section_if "; "						\
+	stringify_in_c(FTR_SECTION_ELSE)			\
+	section_else "; "					\
+	stringify_in_c(ALT_FTR_SECTION_END((msk), (val)))
+
+#define ASM_FTR_IFSET(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), (msk))
+
+#define ASM_FTR_IFCLR(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), 0)
+
 #define ASM_MMU_FTR_IF(section_if, section_else, msk, val)	\
 	stringify_in_c(BEGIN_MMU_FTR_SECTION)			\
 	section_if "; "						\
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 4ef662e4a31d..3a6c586c4e40 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -47,6 +47,7 @@
 #define FW_FEATURE_BEAT		ASM_CONST(0x0000000001000000)
 #define FW_FEATURE_CMO		ASM_CONST(0x0000000002000000)
 #define FW_FEATURE_VPHN		ASM_CONST(0x0000000004000000)
+#define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
 
 #ifndef __ASSEMBLY__
 
@@ -60,7 +61,7 @@ enum {
 		FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
 		FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
 		FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
-		FW_FEATURE_CMO | FW_FEATURE_VPHN,
+		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
 	FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 8edec710cc6d..852b8c1c09db 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -102,6 +102,7 @@
 #define H_ANDCOND		(1UL<<(63-33))
 #define H_ICACHE_INVALIDATE	(1UL<<(63-40))	/* icbi, etc.  (ignored for IO pages) */
 #define H_ICACHE_SYNCHRONIZE	(1UL<<(63-41))	/* dcbst, icbi, etc (ignored for IO pages */
+#define H_COALESCE_CAND	(1UL<<(63-42))	/* page is a good candidate for coalescing */
 #define H_ZERO_PAGE		(1UL<<(63-48))	/* zero the page before mapping (ignored for IO pages) */
 #define H_COPY_PAGE		(1UL<<(63-49))
 #define H_N			(1UL<<(63-61))
@@ -234,6 +235,7 @@
 #define H_GET_MPP		0x2D4
 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC
 #define H_BEST_ENERGY		0x2F4
+#define H_GET_MPP_X		0x314
 #define MAX_HCALL_OPCODE	H_BEST_ENERGY
 
 #ifndef __ASSEMBLY__
@@ -312,6 +314,16 @@ struct hvcall_mpp_data {
 
 int h_get_mpp(struct hvcall_mpp_data *);
 
+struct hvcall_mpp_x_data {
+	unsigned long coalesced_bytes;
+	unsigned long pool_coalesced_bytes;
+	unsigned long pool_purr_cycles;
+	unsigned long pool_spurr_cycles;
+	unsigned long reserved[3];
+};
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data);
+
 #ifdef CONFIG_PPC_PSERIES
 extern int CMO_PrPSP;
 extern int CMO_SecPSP;
diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
index 6efc7782ebf2..fbae49286926 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.h
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -31,7 +31,6 @@ struct iowa_bus {
 	void   *private;
 };
 
-void __devinit io_workaround_init(void);
 void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
 				 int (*)(struct iowa_bus *, void *), void *);
 struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 001f2f11c19b..45698d55cd6a 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -2,6 +2,8 @@
 #define _ASM_POWERPC_IO_H
 #ifdef __KERNEL__
 
+#define ARCH_HAS_IOREMAP_WC
+
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -481,10 +483,16 @@ __do_out_asm(_rec_outl, "stwbrx")
 				_memcpy_fromio(dst,PCI_FIX_ADDR(src),n)
 #endif /* !CONFIG_EEH */
 
-#ifdef CONFIG_PPC_INDIRECT_IO
-#define DEF_PCI_HOOK(x)		x
+#ifdef CONFIG_PPC_INDIRECT_PIO
+#define DEF_PCI_HOOK_pio(x)	x
+#else
+#define DEF_PCI_HOOK_pio(x)	NULL
+#endif
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+#define DEF_PCI_HOOK_mem(x)	x
 #else
-#define DEF_PCI_HOOK(x)		NULL
+#define DEF_PCI_HOOK_mem(x)	NULL
 #endif
 
 /* Structure containing all the hooks */
@@ -504,7 +512,7 @@ extern struct ppc_pci_io {
 #define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
 static inline ret name at					\
 {								\
-	if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL)		\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)	\
 		return ppc_pci_io.name al;			\
 	return __do_##name al;					\
 }
@@ -512,7 +520,7 @@ static inline ret name at					\
 #define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
 static inline void name at					\
 {								\
-	if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL)		\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)		\
 		ppc_pci_io.name al;				\
 	else							\
 		__do_##name al;					\
@@ -616,12 +624,13 @@ static inline void iosync(void)
  * * ioremap is the standard one and provides non-cacheable guarded mappings
  *   and can be hooked by the platform via ppc_md
  *
- * * ioremap_flags allows to specify the page flags as an argument and can
- *   also be hooked by the platform via ppc_md. ioremap_prot is the exact
- *   same thing as ioremap_flags.
+ * * ioremap_prot allows to specify the page flags as an argument and can
+ *   also be hooked by the platform via ppc_md.
  *
  * * ioremap_nocache is identical to ioremap
  *
+ * * ioremap_wc enables write combining
+ *
  * * iounmap undoes such a mapping and can be hooked
  *
  * * __ioremap_at (and the pending __iounmap_at) are low level functions to
@@ -629,7 +638,7 @@ static inline void iosync(void)
  *   currently be hooked. Must be page aligned.
  *
  * * __ioremap is the low level implementation used by ioremap and
- *   ioremap_flags and cannot be hooked (but can be used by a hook on one
+ *   ioremap_prot and cannot be hooked (but can be used by a hook on one
  *   of the previous ones)
  *
  * * __ioremap_caller is the same as above but takes an explicit caller
@@ -640,10 +649,10 @@ static inline void iosync(void)
  *
  */
 extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size,
-				   unsigned long flags);
+extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
+				  unsigned long flags);
+extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
 #define ioremap_nocache(addr, size)	ioremap((addr), (size))
-#define ioremap_prot(addr, size, prot)	ioremap_flags((addr), (size), (prot))
 
 extern void iounmap(volatile void __iomem *addr);
 
diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h
new file mode 100644
index 000000000000..b1a9a1be3c21
--- /dev/null
+++ b/arch/powerpc/include/asm/io_event_irq.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H
+#define _ASM_POWERPC_IO_EVENT_IRQ_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+
+#define PSERIES_IOEI_RPC_MAX_LEN 216
+
+#define PSERIES_IOEI_TYPE_ERR_DETECTED		0x01
+#define PSERIES_IOEI_TYPE_ERR_RECOVERED		0x02
+#define PSERIES_IOEI_TYPE_EVENT			0x03
+#define PSERIES_IOEI_TYPE_RPC_PASS_THRU		0x04
+
+#define PSERIES_IOEI_SUBTYPE_NOT_APP		0x00
+#define PSERIES_IOEI_SUBTYPE_REBALANCE_REQ	0x01
+#define PSERIES_IOEI_SUBTYPE_NODE_ONLINE	0x03
+#define PSERIES_IOEI_SUBTYPE_NODE_OFFLINE	0x04
+#define PSERIES_IOEI_SUBTYPE_DUMP_SIZE_CHANGE	0x05
+#define PSERIES_IOEI_SUBTYPE_TORRENT_IRV_UPDATE	0x06
+#define PSERIES_IOEI_SUBTYPE_TORRENT_HFI_CFGED	0x07
+
+#define PSERIES_IOEI_SCOPE_NOT_APP		0x00
+#define PSERIES_IOEI_SCOPE_RIO_HUB		0x36
+#define PSERIES_IOEI_SCOPE_RIO_BRIDGE		0x37
+#define PSERIES_IOEI_SCOPE_PHB			0x38
+#define PSERIES_IOEI_SCOPE_EADS_GLOBAL		0x39
+#define PSERIES_IOEI_SCOPE_EADS_SLOT		0x3A
+#define PSERIES_IOEI_SCOPE_TORRENT_HUB		0x3B
+#define PSERIES_IOEI_SCOPE_SERVICE_PROC		0x51
+
+/* Platform Event Log Format, Version 6, data portition of IO event section */
+struct pseries_io_event {
+	uint8_t event_type;		/* 0x00 IO-Event Type		*/
+	uint8_t rpc_data_len;		/* 0x01 RPC data length		*/
+	uint8_t scope;			/* 0x02 Error/Event Scope	*/
+	uint8_t event_subtype;		/* 0x03 I/O-Event Sub-Type	*/
+	uint32_t drc_index;		/* 0x04 DRC Index		*/
+	uint8_t rpc_data[PSERIES_IOEI_RPC_MAX_LEN];
+					/* 0x08 RPC Data (0-216 bytes,	*/
+					/* padded to 4 bytes alignment)	*/
+};
+
+extern struct atomic_notifier_head pseries_ioei_notifier_list;
+
+#endif /* _ASM_POWERPC_IO_EVENT_IRQ_H */
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 67ab5fb7d153..1bff591f7f72 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -88,9 +88,6 @@ struct irq_host_ops {
 	/* Dispose of such a mapping */
 	void (*unmap)(struct irq_host *h, unsigned int virq);
 
-	/* Update of such a mapping  */
-	void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
-
 	/* Translate device-tree interrupt specifier from raw format coming
 	 * from the firmware to a irq_hw_number_t (interrupt line number) and
 	 * type (sense) that can be passed to set_irq_type(). In the absence
@@ -128,19 +125,10 @@ struct irq_host {
 	struct device_node	*of_node;
 };
 
-/* The main irq map itself is an array of NR_IRQ entries containing the
- * associate host and irq number. An entry with a host of NULL is free.
- * An entry can be allocated if it's free, the allocator always then sets
- * hwirq first to the host's invalid irq number and then fills ops.
- */
-struct irq_map_entry {
-	irq_hw_number_t	hwirq;
-	struct irq_host	*host;
-};
-
-extern struct irq_map_entry irq_map[NR_IRQS];
-
+struct irq_data;
+extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d);
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
+extern bool virq_is_host(unsigned int virq, struct irq_host *host);
 
 /**
  * irq_alloc_host - Allocate a new irq_host data structure
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f54408d995b5..8a33698c61bd 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -76,7 +76,7 @@ extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
 extern cpumask_t cpus_in_sr;
 static inline int kexec_sr_activated(int cpu)
 {
-	return cpu_isset(cpu,cpus_in_sr);
+	return cpumask_test_cpu(cpu, &cpus_in_sr);
 }
 
 struct kimage;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 5b7504674397..0951b17f4eb5 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -59,6 +59,7 @@
 #define BOOK3S_INTERRUPT_INST_SEGMENT	0x480
 #define BOOK3S_INTERRUPT_EXTERNAL	0x500
 #define BOOK3S_INTERRUPT_EXTERNAL_LEVEL	0x501
+#define BOOK3S_INTERRUPT_EXTERNAL_HV	0x502
 #define BOOK3S_INTERRUPT_ALIGNMENT	0x600
 #define BOOK3S_INTERRUPT_PROGRAM	0x700
 #define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 36fdb3aff30b..d5a8a3861635 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -34,6 +34,7 @@
 	    (\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
+	    (\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \
 	    (\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_PROGRAM) || \
 	    (\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index a077adc0b35e..e0298d26ce5d 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -210,6 +210,8 @@ struct dtl_entry {
 #define DISPATCH_LOG_BYTES	4096	/* bytes per cpu */
 #define N_DISPATCH_LOG		(DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
 
+extern struct kmem_cache *dtl_cache;
+
 /*
  * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index e4f01915fbb0..47cacddb14cf 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,21 +29,6 @@ struct file;
 struct pci_controller;
 struct kimage;
 
-#ifdef CONFIG_SMP
-struct smp_ops_t {
-	void  (*message_pass)(int target, int msg);
-	int   (*probe)(void);
-	void  (*kick_cpu)(int nr);
-	void  (*setup_cpu)(int nr);
-	void  (*bringup_done)(void);
-	void  (*take_timebase)(void);
-	void  (*give_timebase)(void);
-	int   (*cpu_disable)(void);
-	void  (*cpu_die)(unsigned int nr);
-	int   (*cpu_bootable)(unsigned int nr);
-};
-#endif
-
 struct machdep_calls {
 	char		*name;
 #ifdef CONFIG_PPC64
@@ -267,6 +252,7 @@ struct machdep_calls {
 
 extern void e500_idle(void);
 extern void power4_idle(void);
+extern void power7_idle(void);
 extern void ppc6xx_idle(void);
 extern void book3e_idle(void);
 
@@ -311,12 +297,6 @@ extern sys_ctrler_t sys_ctrler;
 
 #endif /* CONFIG_PPC_PMAC */
 
-#ifdef CONFIG_SMP
-/* Poor default implementations */
-extern void __devinit smp_generic_give_timebase(void);
-extern void __devinit smp_generic_take_timebase(void);
-#endif /* CONFIG_SMP */
-
 
 /* Functions to produce codes on the leds.
  * The SRC code should be unique for the message category and should
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 17194fcd4040..3ea0f9a259d8 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -43,6 +43,7 @@
 #define MAS0_TLBSEL(x)		(((x) << 28) & 0x30000000)
 #define MAS0_ESEL(x)		(((x) << 16) & 0x0FFF0000)
 #define MAS0_NV(x)		((x) & 0x00000FFF)
+#define MAS0_ESEL_MASK		0x0FFF0000
 #define MAS0_HES		0x00004000
 #define MAS0_WQ_ALLWAYS		0x00000000
 #define MAS0_WQ_COND		0x00001000
@@ -137,6 +138,21 @@
 #define MMUCSR0_TLB2PS	0x00078000	/* TLB2 Page Size */
 #define MMUCSR0_TLB3PS	0x00780000	/* TLB3 Page Size */
 
+/* MMUCFG bits */
+#define MMUCFG_MAVN_NASK	0x00000003
+#define MMUCFG_MAVN_V1_0	0x00000000
+#define MMUCFG_MAVN_V2_0	0x00000001
+#define MMUCFG_NTLB_MASK	0x0000000c
+#define MMUCFG_NTLB_SHIFT	2
+#define MMUCFG_PIDSIZE_MASK	0x000007c0
+#define MMUCFG_PIDSIZE_SHIFT	6
+#define MMUCFG_TWC		0x00008000
+#define MMUCFG_LRAT		0x00010000
+#define MMUCFG_RASIZE_MASK	0x00fe0000
+#define MMUCFG_RASIZE_SHIFT	17
+#define MMUCFG_LPIDSIZE_MASK	0x0f000000
+#define MMUCFG_LPIDSIZE_SHIFT	24
+
 /* TLBnCFG encoding */
 #define TLBnCFG_N_ENTRY		0x00000fff	/* number of entries */
 #define TLBnCFG_HES		0x00002000	/* HW select supported */
@@ -229,6 +245,10 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 extern int mmu_linear_psize;
 extern int mmu_vmemmap_psize;
 
+#ifdef CONFIG_PPC64
+extern unsigned long linear_map_top;
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index ae7b3efec8e5..d865bd909c7d 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -408,6 +408,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
 typedef unsigned long mm_context_id_t;
+struct spinlock;
 
 typedef struct {
 	mm_context_id_t id;
@@ -423,6 +424,11 @@ typedef struct {
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 	struct subpage_prot_table spt;
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+	struct spinlock *cop_lockp; /* guard acop and cop_pid */
+	unsigned long acop;	/* mask of enabled coprocessor types */
+	unsigned int cop_pid;	/* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
 } mm_context_t;
 
 
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index bb40a06d3b77..4138b21ae80a 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -56,11 +56,6 @@
  */
 #define MMU_FTR_NEED_DTLB_SW_LRU	ASM_CONST(0x00200000)
 
-/* This indicates that the processor uses the ISA 2.06 server tlbie
- * mnemonics
- */
-#define MMU_FTR_TLBIE_206		ASM_CONST(0x00400000)
-
 /* Enable use of TLB reservation.  Processor should support tlbsrx.
  * instruction and MAS0[WQ].
  */
@@ -70,6 +65,53 @@
  */
 #define MMU_FTR_USE_PAIRED_MAS		ASM_CONST(0x01000000)
 
+/* MMU is SLB-based
+ */
+#define MMU_FTR_SLB			ASM_CONST(0x02000000)
+
+/* Support 16M large pages
+ */
+#define MMU_FTR_16M_PAGE		ASM_CONST(0x04000000)
+
+/* Supports TLBIEL variant
+ */
+#define MMU_FTR_TLBIEL			ASM_CONST(0x08000000)
+
+/* Supports tlbies w/o locking
+ */
+#define MMU_FTR_LOCKLESS_TLBIE		ASM_CONST(0x10000000)
+
+/* Large pages can be marked CI
+ */
+#define MMU_FTR_CI_LARGE_PAGE		ASM_CONST(0x20000000)
+
+/* 1T segments available
+ */
+#define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
+
+/* Doesn't support the B bit (1T segment) in SLBIE
+ */
+#define MMU_FTR_NO_SLBIE_B		ASM_CONST(0x80000000)
+
+/* MMU feature bit sets for various CPUs */
+#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
+	MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
+#define MMU_FTRS_POWER4		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970		MMU_FTRS_POWER4
+#define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE
+#define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
+#define MMU_FTRS_A2		MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \
+				MMU_FTR_USE_TLBIVAX_BCAST | \
+				MMU_FTR_LOCK_BCAST_INVAL | \
+				MMU_FTR_USE_TLBRSRV | \
+				MMU_FTR_USE_PAIRED_MAS | \
+				MMU_FTR_TLBIEL | \
+				MMU_FTR_16M_PAGE
 #ifndef __ASSEMBLY__
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 81fb41289d6c..a73668a5f30d 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -32,6 +32,10 @@ extern void __destroy_context(unsigned long context_id);
 extern void mmu_context_init(void);
 #endif
 
+extern void switch_cop(struct mm_struct *next);
+extern int use_cop(unsigned long acop, struct mm_struct *mm);
+extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+
 /*
  * switch_mm is the entry point called from the architecture independent
  * code in kernel/sched.c
@@ -55,6 +59,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	if (prev == next)
 		return;
 
+#ifdef CONFIG_PPC_ICSWX
+	/* Switch coprocessor context only if prev or next uses a coprocessor */
+	if (prev->context.acop || next->context.acop)
+		switch_cop(next);
+#endif /* CONFIG_PPC_ICSWX */
+
 	/* We must stop all altivec streams before changing the HW
 	 * context
 	 */
@@ -67,7 +77,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 * sub architectures.
 	 */
 #ifdef CONFIG_PPC_STD_MMU_64
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		switch_slb(tsk, next);
 	else
 		switch_stab(tsk, next);
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 7005ee0b074d..df18989e78d4 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -3,7 +3,6 @@
 #ifdef __KERNEL__
 
 #include <linux/irq.h>
-#include <linux/sysdev.h>
 #include <asm/dcr.h>
 #include <asm/msi_bitmap.h>
 
@@ -263,6 +262,7 @@ struct mpic
 #ifdef CONFIG_SMP
 	struct irq_chip		hc_ipi;
 #endif
+	struct irq_chip		hc_tm;
 	const char		*name;
 	/* Flags */
 	unsigned int		flags;
@@ -281,7 +281,7 @@ struct mpic
 
 	/* vector numbers used for internal sources (ipi/timers) */
 	unsigned int		ipi_vecs[4];
-	unsigned int		timer_vecs[4];
+	unsigned int		timer_vecs[8];
 
 	/* Spurious vector to program into unused sources */
 	unsigned int		spurious_vec;
@@ -320,8 +320,6 @@ struct mpic
 	/* link */
 	struct mpic		*next;
 
-	struct sys_device	sysdev;
-
 #ifdef CONFIG_PM
 	struct mpic_irq_save	*save_data;
 #endif
@@ -371,6 +369,8 @@ struct mpic
  * NOTE: This flag trumps MPIC_WANTS_RESET.
  */
 #define MPIC_NO_RESET			0x00004000
+/* Freescale MPIC (compatible includes "fsl,mpic") */
+#define MPIC_FSL			0x00008000
 
 /* MPIC HW modification ID */
 #define MPIC_REGSET_MASK		0xf0000000
diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index d4b4bfa26fb3..89d2f99c1bf4 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -18,13 +18,18 @@
 extern int pSeries_reconfig_notifier_register(struct notifier_block *);
 extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
 extern struct blocking_notifier_head pSeries_reconfig_chain;
+/* Not the best place to put this, will be fixed when we move some
+ * of the rtas suspend-me stuff to pseries */
+extern void pSeries_coalesce_init(void);
 #else /* !CONFIG_PPC_PSERIES */
 static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
 	return 0;
 }
 static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
+static inline void pSeries_coalesce_init(void) { }
 #endif /* CONFIG_PPC_PSERIES */
 
+
 #endif /* __KERNEL__ */
 #endif /* _PPC64_PSERIES_RECONFIG_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ec57540cd7af..74126765106a 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -92,9 +92,9 @@ struct paca_struct {
 	 * Now, starting in cacheline 2, the exception save areas
 	 */
 	/* used for most interrupts/exceptions */
-	u64 exgen[10] __attribute__((aligned(0x80)));
-	u64 exmc[10];		/* used for machine checks */
-	u64 exslb[10];		/* used for SLB/segment table misses
+	u64 exgen[11] __attribute__((aligned(0x80)));
+	u64 exmc[11];		/* used for machine checks */
+	u64 exslb[11];		/* used for SLB/segment table misses
  				 * on the linear mapping */
 	/* SLB related definitions */
 	u16 vmalloc_sllp;
@@ -106,7 +106,8 @@ struct paca_struct {
 	pgd_t *pgd;			/* Current PGD */
 	pgd_t *kernel_pgd;		/* Kernel PGD */
 	u64 exgen[8] __attribute__((aligned(0x80)));
-	u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80)));
+	/* We can have up to 3 levels of reentrancy in the TLB miss handler */
+	u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80)));
 	u64 exmc[8];		/* used for machine checks */
 	u64 excrit[8];		/* used for crit interrupts */
 	u64 exdbg[8];		/* used for debug interrupts */
@@ -125,7 +126,7 @@ struct paca_struct {
 	struct task_struct *__current;	/* Pointer to current */
 	u64 kstack;			/* Saved Kernel stack addr */
 	u64 stab_rr;			/* stab/slb round-robin counter */
-	u64 saved_r1;			/* r1 save for RTAS calls */
+	u64 saved_r1;			/* r1 save for RTAS calls or PM */
 	u64 saved_msr;			/* MSR saved here by enter_rtas */
 	u16 trap_save;			/* Used when bad stack is encountered */
 	u8 soft_enabled;		/* irq soft-enable flag */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 812b2cd80aed..9356262fd3cc 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -59,24 +59,7 @@ static __inline__ void clear_page(void *addr)
 	: "ctr", "memory");
 }
 
-extern void copy_4K_page(void *to, void *from);
-
-#ifdef CONFIG_PPC_64K_PAGES
-static inline void copy_page(void *to, void *from)
-{
-	unsigned int i;
-	for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
-		copy_4K_page(to, from);
-		to += 4096;
-		from += 4096;
-	}
-}
-#else /* CONFIG_PPC_64K_PAGES */
-static inline void copy_page(void *to, void *from)
-{
-	copy_4K_page(to, from);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
+extern void copy_page(void *to, void *from);
 
 /* Log 2 of page table size */
 extern u64 ppc64_pft_size;
@@ -130,7 +113,7 @@ extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
 extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 				  unsigned long len, unsigned int psize);
 
-#define slice_mm_new_context(mm)	((mm)->context.id == 0)
+#define slice_mm_new_context(mm)	((mm)->context.id == MMU_NO_CONTEXT)
 
 #endif /* __ASSEMBLY__ */
 #else
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 2b09cd522d33..81576ee0cfb1 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -257,21 +257,20 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	unsigned long old;
 
-       	if ((pte_val(*ptep) & _PAGE_RW) == 0)
-       		return;
-	old = pte_update(mm, addr, ptep, _PAGE_RW, 0);
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	unsigned long old;
-
 	if ((pte_val(*ptep) & _PAGE_RW) == 0)
 		return;
-	old = pte_update(mm, addr, ptep, _PAGE_RW, 1);
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 1);
 }
 
 /*
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1255569387b6..e472659d906c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -41,6 +41,10 @@
 #define PPC_INST_RFCI			0x4c000066
 #define PPC_INST_RFDI			0x4c00004e
 #define PPC_INST_RFMCI			0x4c00004c
+#define PPC_INST_MFSPR_DSCR		0x7c1102a6
+#define PPC_INST_MFSPR_DSCR_MASK	0xfc1fffff
+#define PPC_INST_MTSPR_DSCR		0x7c1103a6
+#define PPC_INST_MTSPR_DSCR_MASK	0xfc1fffff
 
 #define PPC_INST_STRING			0x7c00042a
 #define PPC_INST_STRING_MASK		0xfc0007fe
@@ -56,6 +60,17 @@
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_XXLOR			0xf0000510
 
+#define PPC_INST_NAP			0x4c000364
+#define PPC_INST_SLEEP			0x4c0003a4
+
+/* A2 specific instructions */
+#define PPC_INST_ERATWE			0x7c0001a6
+#define PPC_INST_ERATRE			0x7c000166
+#define PPC_INST_ERATILX		0x7c000066
+#define PPC_INST_ERATIVAX		0x7c000666
+#define PPC_INST_ERATSX			0x7c000126
+#define PPC_INST_ERATSX_DOT		0x7c000127
+
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
@@ -67,6 +82,8 @@
 #define __PPC_XT(s)	__PPC_XS(s)
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
+#define __PPC_WS(w)	(((w) & 0x1f) << 11)
+
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
  * larx with EH set as an illegal instruction.
@@ -113,6 +130,21 @@
 #define PPC_TLBIVAX(a,b)	stringify_in_c(.long PPC_INST_TLBIVAX | \
 					__PPC_RA(a) | __PPC_RB(b))
 
+#define PPC_ERATWE(s, a, w)	stringify_in_c(.long PPC_INST_ERATWE | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATRE(s, a, w)	stringify_in_c(.long PPC_INST_ERATRE | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATILX(t, a, b)	stringify_in_c(.long PPC_INST_ERATILX | \
+					__PPC_T_TLB(t) | __PPC_RA(a) | \
+					__PPC_RB(b))
+#define PPC_ERATIVAX(s, a, b)	stringify_in_c(.long PPC_INST_ERATIVAX | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX | \
+					__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX_DOT(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX_DOT | \
+					__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+
+
 /*
  * Define what the VSX XX1 form instructions will look like, then add
  * the 128 bit load store instructions based on that.
@@ -126,4 +158,7 @@
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), (a), (b)))
 
+#define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
+#define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
+
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 98210067c1cc..1b422381fc16 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -170,6 +170,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define HMT_MEDIUM	or	2,2,2
 #define HMT_MEDIUM_HIGH or	5,5,5		# medium high priority
 #define HMT_HIGH	or	3,3,3
+#define HMT_EXTRA_HIGH	or	7,7,7		# power7 only
 
 #ifdef __KERNEL__
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index de1967a1ff57..d50c2b6d9bc3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -238,6 +238,10 @@ struct thread_struct {
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	void*		kvm_shadow_vcpu; /* KVM internal data */
 #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+#ifdef CONFIG_PPC64
+	unsigned long	dscr;
+	int		dscr_inherit;
+#endif
 };
 
 #define ARCH_MIN_TASKALIGN 16
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 811f04ac3660..8d1569c29042 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -162,7 +162,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
  * on platforms where such control is possible.
  */
 #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-	defined(CONFIG_KPROBES)
+	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
 #define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
 #else
 #define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7e4abebe76c0..c5cae0dd176c 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -99,17 +99,23 @@
 #define MSR_LE		__MASK(MSR_LE_LG)	/* Little Endian */
 
 #if defined(CONFIG_PPC_BOOK3S_64)
+#define MSR_64BIT	MSR_SF
+
 /* Server variant */
 #define MSR_		MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV
-#define MSR_KERNEL      MSR_ | MSR_SF
+#define MSR_KERNEL	MSR_ | MSR_64BIT
 #define MSR_USER32	MSR_ | MSR_PR | MSR_EE
-#define MSR_USER64	MSR_USER32 | MSR_SF
+#define MSR_USER64	MSR_USER32 | MSR_64BIT
 #elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
 /* Default MSR for kernel mode. */
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #endif
 
+#ifndef MSR_64BIT
+#define MSR_64BIT	0
+#endif
+
 /* Floating Point Status and Control Register (FPSCR) Fields */
 #define FPSCR_FX	0x80000000	/* FPU exception summary */
 #define FPSCR_FEX	0x40000000	/* FPU enabled exception summary */
@@ -182,6 +188,8 @@
 
 #define SPRN_CTR	0x009	/* Count Register */
 #define SPRN_DSCR	0x11
+#define SPRN_CFAR	0x1c	/* Come From Address Register */
+#define SPRN_ACOP	0x1F	/* Available Coprocessor Register */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
 #define   CTRL_CT	0xc0000000	/* current thread */
@@ -210,8 +218,43 @@
 #define SPRN_TBWL	0x11C	/* Time Base Lower Register (super, R/W) */
 #define SPRN_TBWU	0x11D	/* Time Base Upper Register (super, R/W) */
 #define SPRN_SPURR	0x134	/* Scaled PURR */
+#define SPRN_HSPRG0	0x130	/* Hypervisor Scratch 0 */
+#define SPRN_HSPRG1	0x131	/* Hypervisor Scratch 1 */
+#define SPRN_HDSISR     0x132
+#define SPRN_HDAR       0x133
+#define SPRN_HDEC	0x136	/* Hypervisor Decrementer */
 #define SPRN_HIOR	0x137	/* 970 Hypervisor interrupt offset */
+#define SPRN_RMOR	0x138	/* Real mode offset register */
+#define SPRN_HRMOR	0x139	/* Real mode offset register */
+#define SPRN_HSRR0	0x13A	/* Hypervisor Save/Restore 0 */
+#define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
+#define   LPCR_VPM0	(1ul << (63-0))
+#define   LPCR_VPM1	(1ul << (63-1))
+#define   LPCR_ISL	(1ul << (63-2))
+#define   LPCR_DPFD_SH	(63-11)
+#define   LPCR_VRMA_L	(1ul << (63-12))
+#define   LPCR_VRMA_LP0	(1ul << (63-15))
+#define   LPCR_VRMA_LP1	(1ul << (63-16))
+#define   LPCR_RMLS    0x1C000000      /* impl dependent rmo limit sel */
+#define   LPCR_ILE     0x02000000      /* !HV irqs set MSR:LE */
+#define   LPCR_PECE	0x00007000	/* powersave exit cause enable */
+#define     LPCR_PECE0	0x00004000	/* ext. exceptions can cause exit */
+#define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
+#define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
+#define   LPCR_MER	0x00000800	/* Mediated External Exception */
+#define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
+#define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
+#define   LPCR_RMI     0x00000002      /* real mode is cache inhibit */
+#define   LPCR_HDICE   0x00000001      /* Hyp Decr enable (HV,PR,EE) */
+#define SPRN_LPID	0x13F	/* Logical Partition Identifier */
+#define	SPRN_HMER	0x150	/* Hardware m? error recovery */
+#define	SPRN_HMEER	0x151	/* Hardware m? enable error recovery */
+#define	SPRN_HEIR	0x153	/* Hypervisor Emulated Instruction Register */
+#define SPRN_TLBINDEXR	0x154	/* P7 TLB control register */
+#define SPRN_TLBVPNR	0x155	/* P7 TLB control register */
+#define SPRN_TLBRPNR	0x156	/* P7 TLB control register */
+#define SPRN_TLBLPIDR	0x157	/* P7 TLB control register */
 #define SPRN_DBAT0L	0x219	/* Data BAT 0 Lower Register */
 #define SPRN_DBAT0U	0x218	/* Data BAT 0 Upper Register */
 #define SPRN_DBAT1L	0x21B	/* Data BAT 1 Lower Register */
@@ -434,16 +477,23 @@
 #define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */
 #define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
-#define   SRR1_WAKERESET	0x00380000 /* System reset */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEMT		0x00280000 /* mtctrl */
+#define	  SRR1_WAKEHMI		0x00280000 /* Hypervisor maintenance */
 #define   SRR1_WAKEDEC		0x00180000 /* Decrementer interrupt */
 #define   SRR1_WAKETHERM	0x00100000 /* Thermal management interrupt */
+#define	  SRR1_WAKERESET	0x00100000 /* System reset */
+#define	  SRR1_WAKESTATE	0x00030000 /* Powersave exit mask [46:47] */
+#define	  SRR1_WS_DEEPEST	0x00030000 /* Some resources not maintained,
+					  * may not be recoverable */
+#define	  SRR1_WS_DEEPER	0x00020000 /* Some resources not maintained */
+#define	  SRR1_WS_DEEP		0x00010000 /* All resources maintained */
 #define   SRR1_PROGFPE		0x00100000 /* Floating Point Enabled */
 #define   SRR1_PROGPRIV		0x00040000 /* Privileged instruction */
 #define   SRR1_PROGTRAP		0x00020000 /* Trap */
 #define   SRR1_PROGADDR		0x00010000 /* SRR0 contains subsequent addr */
+
 #define SPRN_HSRR0	0x13A	/* Save/Restore Register 0 */
 #define SPRN_HSRR1	0x13B	/* Save/Restore Register 1 */
 
@@ -673,12 +723,15 @@
  * SPRG usage:
  *
  * All 64-bit:
- *	- SPRG1 stores PACA pointer
+ *	- SPRG1 stores PACA pointer except 64-bit server in
+ *        HV mode in which case it is HSPRG0
  *
  * 64-bit server:
  *	- SPRG0 unused (reserved for HV on Power4)
  *	- SPRG2 scratch for exception vectors
  *	- SPRG3 unused (user visible)
+ *      - HSPRG0 stores PACA in HV mode
+ *      - HSPRG1 scratch for "HV" exceptions
  *
  * 64-bit embedded
  *	- SPRG0 generic exception scratch
@@ -741,6 +794,41 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG2
+#define SPRN_SPRG_HPACA		SPRN_HSPRG0
+#define SPRN_SPRG_HSCRATCH0	SPRN_HSPRG1
+
+#define GET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_PACA;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HPACA;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_PACA,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HPACA,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define GET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_SCRATCH0;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HSCRATCH0;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_SCRATCH0,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HSCRATCH0,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+#define GET_SCRATCH0(rX)	mfspr	rX,SPRN_SPRG_SCRATCH0
+#define SET_SCRATCH0(rX)	mtspr	SPRN_SPRG_SCRATCH0,rX
+
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E_64
@@ -750,6 +838,10 @@
 #define SPRN_SPRG_TLB_EXFRAME	SPRN_SPRG2
 #define SPRN_SPRG_TLB_SCRATCH	SPRN_SPRG6
 #define SPRN_SPRG_GEN_SCRATCH	SPRN_SPRG0
+
+#define SET_PACA(rX)	mtspr	SPRN_SPRG_PACA,rX
+#define GET_PACA(rX)	mfspr	rX,SPRN_SPRG_PACA
+
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -800,6 +892,8 @@
 #define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
 #endif
 
+
+
 /*
  * An mtfsf instruction with the L bit set. On CPUs that support this a
  * full 64bits of FPSCR is restored and on other CPUs the L bit is ignored.
@@ -894,6 +988,8 @@
 #define PV_POWER5p	0x003B
 #define PV_POWER7	0x003F
 #define PV_970FX	0x003C
+#define PV_POWER6	0x003E
+#define PV_POWER7	0x003F
 #define PV_630		0x0040
 #define PV_630p	0x0041
 #define PV_970MP	0x0044
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
new file mode 100644
index 000000000000..3d52a1132f3d
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_a2.h
@@ -0,0 +1,165 @@
+/*
+ *  Register definitions specific to the A2 core
+ *
+ *  Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ASM_POWERPC_REG_A2_H__
+#define __ASM_POWERPC_REG_A2_H__
+
+#define SPRN_TENSR	0x1b5
+#define SPRN_TENS	0x1b6	/* Thread ENable Set */
+#define SPRN_TENC	0x1b7	/* Thread ENable Clear */
+
+#define SPRN_A2_CCR0	0x3f0	/* Core Configuration Register 0 */
+#define SPRN_A2_CCR1	0x3f1	/* Core Configuration Register 1 */
+#define SPRN_A2_CCR2	0x3f2	/* Core Configuration Register 2 */
+#define SPRN_MMUCR0	0x3fc	/* MMU Control Register 0 */
+#define SPRN_MMUCR1	0x3fd	/* MMU Control Register 1 */
+#define SPRN_MMUCR2	0x3fe	/* MMU Control Register 2 */
+#define SPRN_MMUCR3	0x3ff	/* MMU Control Register 3 */
+
+#define SPRN_IAR	0x372
+
+#define SPRN_IUCR0	0x3f3
+#define IUCR0_ICBI_ACK	0x1000
+
+#define SPRN_XUCR0	0x3f6	/* Execution Unit Config Register 0 */
+
+#define A2_IERAT_SIZE	16
+#define A2_DERAT_SIZE	32
+
+/* A2 MMUCR0 bits */
+#define MMUCR0_ECL	0x80000000	/* Extended Class for TLB fills */
+#define MMUCR0_TID_NZ	0x40000000	/* TID is non-zero */
+#define MMUCR0_TS	0x10000000	/* Translation space for TLB fills */
+#define MMUCR0_TGS	0x20000000	/* Guest space for TLB fills */
+#define MMUCR0_TLBSEL	0x0c000000	/* TLB or ERAT target for TLB fills */
+#define MMUCR0_TLBSEL_U	0x00000000	/*  TLBSEL = UTLB */
+#define MMUCR0_TLBSEL_I	0x08000000	/*  TLBSEL = I-ERAT */
+#define MMUCR0_TLBSEL_D	0x0c000000	/*  TLBSEL = D-ERAT */
+#define MMUCR0_LOCKSRSH	0x02000000	/* Use TLB lock on tlbsx. */
+#define MMUCR0_TID_MASK	0x000000ff	/* TID field */
+
+/* A2 MMUCR1 bits */
+#define MMUCR1_IRRE		0x80000000	/* I-ERAT round robin enable */
+#define MMUCR1_DRRE		0x40000000	/* D-ERAT round robin enable */
+#define MMUCR1_REE		0x20000000	/* Reference Exception Enable*/
+#define MMUCR1_CEE		0x10000000	/* Change exception enable */
+#define MMUCR1_CSINV_ALL	0x00000000	/* Inval ERAT on all CS evts */
+#define MMUCR1_CSINV_NISYNC	0x04000000	/* Inval ERAT on all ex isync*/
+#define MMUCR1_CSINV_NEVER	0x0c000000	/* Don't inval ERAT on CS */
+#define MMUCR1_ICTID		0x00080000	/* IERAT class field as TID */
+#define MMUCR1_ITTID		0x00040000	/* IERAT thdid field as TID */
+#define MMUCR1_DCTID		0x00020000	/* DERAT class field as TID */
+#define MMUCR1_DTTID		0x00010000	/* DERAT thdid field as TID */
+#define MMUCR1_DCCD		0x00008000	/* DERAT class ignore */
+#define MMUCR1_TLBWE_BINV	0x00004000	/* back invalidate on tlbwe */
+
+/* A2 MMUCR2 bits */
+#define MMUCR2_PSSEL_SHIFT	4
+
+/* A2 MMUCR3 bits */
+#define MMUCR3_THID		0x0000000f	/* Thread ID */
+
+/* *** ERAT TLB bits definitions */
+#define TLB0_EPN_MASK		ASM_CONST(0xfffffffffffff000)
+#define TLB0_CLASS_MASK		ASM_CONST(0x0000000000000c00)
+#define TLB0_CLASS_00		ASM_CONST(0x0000000000000000)
+#define TLB0_CLASS_01		ASM_CONST(0x0000000000000400)
+#define TLB0_CLASS_10		ASM_CONST(0x0000000000000800)
+#define TLB0_CLASS_11		ASM_CONST(0x0000000000000c00)
+#define TLB0_V			ASM_CONST(0x0000000000000200)
+#define TLB0_X			ASM_CONST(0x0000000000000100)
+#define TLB0_SIZE_MASK		ASM_CONST(0x00000000000000f0)
+#define TLB0_SIZE_4K		ASM_CONST(0x0000000000000010)
+#define TLB0_SIZE_64K		ASM_CONST(0x0000000000000030)
+#define TLB0_SIZE_1M		ASM_CONST(0x0000000000000050)
+#define TLB0_SIZE_16M		ASM_CONST(0x0000000000000070)
+#define TLB0_SIZE_1G		ASM_CONST(0x00000000000000a0)
+#define TLB0_THDID_MASK		ASM_CONST(0x000000000000000f)
+#define TLB0_THDID_0		ASM_CONST(0x0000000000000001)
+#define TLB0_THDID_1		ASM_CONST(0x0000000000000002)
+#define TLB0_THDID_2		ASM_CONST(0x0000000000000004)
+#define TLB0_THDID_3		ASM_CONST(0x0000000000000008)
+#define TLB0_THDID_ALL		ASM_CONST(0x000000000000000f)
+
+#define TLB1_RESVATTR		ASM_CONST(0x00f0000000000000)
+#define TLB1_U0			ASM_CONST(0x0008000000000000)
+#define TLB1_U1			ASM_CONST(0x0004000000000000)
+#define TLB1_U2			ASM_CONST(0x0002000000000000)
+#define TLB1_U3			ASM_CONST(0x0001000000000000)
+#define TLB1_R			ASM_CONST(0x0000800000000000)
+#define TLB1_C			ASM_CONST(0x0000400000000000)
+#define TLB1_RPN_MASK		ASM_CONST(0x000003fffffff000)
+#define TLB1_W			ASM_CONST(0x0000000000000800)
+#define TLB1_I			ASM_CONST(0x0000000000000400)
+#define TLB1_M			ASM_CONST(0x0000000000000200)
+#define TLB1_G			ASM_CONST(0x0000000000000100)
+#define TLB1_E			ASM_CONST(0x0000000000000080)
+#define TLB1_VF			ASM_CONST(0x0000000000000040)
+#define TLB1_UX			ASM_CONST(0x0000000000000020)
+#define TLB1_SX			ASM_CONST(0x0000000000000010)
+#define TLB1_UW			ASM_CONST(0x0000000000000008)
+#define TLB1_SW			ASM_CONST(0x0000000000000004)
+#define TLB1_UR			ASM_CONST(0x0000000000000002)
+#define TLB1_SR			ASM_CONST(0x0000000000000001)
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+#define WSP_UART_PHYS	0xffc000c000
+/* This needs to be careful chosen to hit a !0 congruence class
+ * in the TLB since we bolt it in way 3, which is already occupied
+ * by our linear mapping primary bolted entry in CC 0.
+ */
+#define WSP_UART_VIRT	0xf000000000001000
+#endif
+
+/* A2 erativax attributes definitions */
+#define ERATIVAX_RS_IS_ALL		0x000
+#define ERATIVAX_RS_IS_TID		0x040
+#define ERATIVAX_RS_IS_CLASS		0x080
+#define ERATIVAX_RS_IS_FULLMATCH	0x0c0
+#define ERATIVAX_CLASS_00		0x000
+#define ERATIVAX_CLASS_01		0x010
+#define ERATIVAX_CLASS_10		0x020
+#define ERATIVAX_CLASS_11		0x030
+#define ERATIVAX_PSIZE_4K		(TLB_PSIZE_4K >> 1)
+#define ERATIVAX_PSIZE_64K		(TLB_PSIZE_64K >> 1)
+#define ERATIVAX_PSIZE_1M		(TLB_PSIZE_1M >> 1)
+#define ERATIVAX_PSIZE_16M		(TLB_PSIZE_16M >> 1)
+#define ERATIVAX_PSIZE_1G		(TLB_PSIZE_1G >> 1)
+
+/* A2 eratilx attributes definitions */
+#define ERATILX_T_ALL			0
+#define ERATILX_T_TID			1
+#define ERATILX_T_TGS			2
+#define ERATILX_T_FULLMATCH		3
+#define ERATILX_T_CLASS0		4
+#define ERATILX_T_CLASS1		5
+#define ERATILX_T_CLASS2		6
+#define ERATILX_T_CLASS3		7
+
+/* XUCR0 bits */
+#define XUCR0_TRACE_UM_T0		0x40000000	/* Thread 0 */
+#define XUCR0_TRACE_UM_T1		0x20000000	/* Thread 1 */
+#define XUCR0_TRACE_UM_T2		0x10000000	/* Thread 2 */
+#define XUCR0_TRACE_UM_T3		0x08000000	/* Thread 3 */
+
+/* A2 CCR0 register */
+#define A2_CCR0_PME_DISABLED		0x00000000
+#define A2_CCR0_PME_SLEEP		0x40000000
+#define A2_CCR0_PME_RVW			0x80000000
+#define A2_CCR0_PME_DISABLED2		0xc0000000
+
+/* A2 CCR2 register */
+#define A2_CCR2_ERAT_ONLY_MODE		0x00000001
+#define A2_CCR2_ENABLE_ICSWX		0x00000002
+#define A2_CCR2_ENABLE_PC		0x20000000
+#define A2_CCR2_ENABLE_TRACE		0x40000000
+
+#endif /* __ASM_POWERPC_REG_A2_H__ */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index b316794aa2b5..0f0ad9fa01c1 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -27,10 +27,12 @@
 #define MSR_CM		(1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */
 
 #if defined(CONFIG_PPC_BOOK3E_64)
+#define MSR_64BIT	MSR_CM
+
 #define MSR_		MSR_ME | MSR_CE
-#define MSR_KERNEL      MSR_ | MSR_CM
+#define MSR_KERNEL	MSR_ | MSR_64BIT
 #define MSR_USER32	MSR_ | MSR_PR | MSR_EE | MSR_DE
-#define MSR_USER64	MSR_USER32 | MSR_CM | MSR_DE
+#define MSR_USER64	MSR_USER32 | MSR_64BIT
 #elif defined (CONFIG_40x)
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
@@ -81,6 +83,10 @@
 #define SPRN_IVOR13	0x19D	/* Interrupt Vector Offset Register 13 */
 #define SPRN_IVOR14	0x19E	/* Interrupt Vector Offset Register 14 */
 #define SPRN_IVOR15	0x19F	/* Interrupt Vector Offset Register 15 */
+#define SPRN_IVOR38	0x1B0	/* Interrupt Vector Offset Register 38 */
+#define SPRN_IVOR39	0x1B1	/* Interrupt Vector Offset Register 39 */
+#define SPRN_IVOR40	0x1B2	/* Interrupt Vector Offset Register 40 */
+#define SPRN_IVOR41	0x1B3	/* Interrupt Vector Offset Register 41 */
 #define SPRN_SPEFSCR	0x200	/* SPE & Embedded FP Status & Control */
 #define SPRN_BBEAR	0x201	/* Branch Buffer Entry Address Register */
 #define SPRN_BBTAR	0x202	/* Branch Buffer Target Address Register */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9a1193e30f26..58625d1e7802 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -158,7 +158,50 @@ struct rtas_error_log {
 	unsigned long target:4;			/* Target of failed operation */
 	unsigned long type:8;			/* General event or error*/
 	unsigned long extended_log_length:32;	/* length in bytes */
-	unsigned char buffer[1];
+	unsigned char buffer[1];		/* Start of extended log */
+						/* Variable length.      */
+};
+
+#define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG	14
+
+#define RTAS_V6EXT_COMPANY_ID_IBM	(('I' << 24) | ('B' << 16) | ('M' << 8))
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+	/* Byte 0 */
+	uint32_t log_valid:1;		/* 1:Log valid */
+	uint32_t unrecoverable_error:1;	/* 1:Unrecoverable error */
+	uint32_t recoverable_error:1;	/* 1:recoverable (correctable	*/
+					/*   or successfully retried)	*/
+	uint32_t degraded_operation:1;	/* 1:Unrecoverable err, bypassed*/
+					/*   - degraded operation (e.g.	*/
+					/*   CPU or mem taken off-line)	*/
+	uint32_t predictive_error:1;
+	uint32_t new_log:1;		/* 1:"New" log (Always 1 for	*/
+					/*   data returned from RTAS	*/
+	uint32_t big_endian:1;		/* 1: Big endian */
+	uint32_t :1;			/* reserved */
+	/* Byte 1 */
+	uint32_t :8;			/* reserved */
+	/* Byte 2 */
+	uint32_t powerpc_format:1;	/* Set to 1 (indicating log is	*/
+					/* in PowerPC format		*/
+	uint32_t :3;			/* reserved */
+	uint32_t log_format:4;		/* Log format indicator. Define	*/
+					/* format used for byte 12-2047	*/
+	/* Byte 3 */
+	uint32_t :8;			/* reserved */
+	/* Byte 4-11 */
+	uint8_t reserved[8];		/* reserved */
+	/* Byte 12-15 */
+	uint32_t company_id;		/* Company ID of the company	*/
+					/* that defines the format for	*/
+					/* the vendor specific log type	*/
+	/* Byte 16-end of log */
+	uint8_t vendor_log[1];		/* Start of vendor specific log	*/
+					/* Variable length.		*/
 };
 
 /*
diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
new file mode 100644
index 000000000000..0cabfd7bc2d1
--- /dev/null
+++ b/arch/powerpc/include/asm/scom.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *     and        David Gibson, IBM Corporation.
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_POWERPC_SCOM_H
+#define _ASM_POWERPC_SCOM_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PPC_SCOM
+
+/*
+ * The SCOM bus is a sideband bus used for accessing various internal
+ * registers of the processor or the chipset. The implementation details
+ * differ between processors and platforms, and the access method as
+ * well.
+ *
+ * This API allows to "map" ranges of SCOM register numbers associated
+ * with a given SCOM controller. The later must be represented by a
+ * device node, though some implementations might support NULL if there
+ * is no possible ambiguity
+ *
+ * Then, scom_read/scom_write can be used to accesses registers inside
+ * that range. The argument passed is a register number relative to
+ * the beginning of the range mapped.
+ */
+
+typedef void *scom_map_t;
+
+/* Value for an invalid SCOM map */
+#define SCOM_MAP_INVALID	(NULL)
+
+/* The scom_controller data structure is what the platform passes
+ * to the core code in scom_init, it provides the actual implementation
+ * of all the SCOM functions
+ */
+struct scom_controller {
+	scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
+	void (*unmap)(scom_map_t map);
+
+	u64 (*read)(scom_map_t map, u32 reg);
+	void (*write)(scom_map_t map, u32 reg, u64 value);
+};
+
+extern const struct scom_controller *scom_controller;
+
+/**
+ * scom_init - Initialize the SCOM backend, called by the platform
+ * @controller: The platform SCOM controller
+ */
+static inline void scom_init(const struct scom_controller *controller)
+{
+	scom_controller = controller;
+}
+
+/**
+ * scom_map_ok - Test is a SCOM mapping is successful
+ * @map: The result of scom_map to test
+ */
+static inline int scom_map_ok(scom_map_t map)
+{
+	return map != SCOM_MAP_INVALID;
+}
+
+/**
+ * scom_map - Map a block of SCOM registers
+ * @ctrl_dev: Device node of the SCOM controller
+ *            some implementations allow NULL here
+ * @reg: first SCOM register to map
+ * @count: Number of SCOM registers to map
+ */
+
+static inline scom_map_t scom_map(struct device_node *ctrl_dev,
+				  u64 reg, u64 count)
+{
+	return scom_controller->map(ctrl_dev, reg, count);
+}
+
+/**
+ * scom_find_parent - Find the SCOM controller for a device
+ * @dev: OF node of the device
+ *
+ * This is not meant for general usage, but in combination with
+ * scom_map() allows to map registers not represented by the
+ * device own scom-reg property. Useful for applying HW workarounds
+ * on things not properly represented in the device-tree for example.
+ */
+struct device_node *scom_find_parent(struct device_node *dev);
+
+
+/**
+ * scom_map_device - Map a device's block of SCOM registers
+ * @dev: OF node of the device
+ * @index: Register bank index (index in "scom-reg" property)
+ *
+ * This function will use the device-tree binding for SCOM which
+ * is to follow "scom-parent" properties until it finds a node with
+ * a "scom-controller" property to find the controller. It will then
+ * use the "scom-reg" property which is made of reg/count pairs,
+ * each of them having a size defined by the controller's #scom-cells
+ * property
+ */
+extern scom_map_t scom_map_device(struct device_node *dev, int index);
+
+
+/**
+ * scom_unmap - Unmap a block of SCOM registers
+ * @map: Result of scom_map is to be unmapped
+ */
+static inline void scom_unmap(scom_map_t map)
+{
+	if (scom_map_ok(map))
+		scom_controller->unmap(map);
+}
+
+/**
+ * scom_read - Read a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ */
+static inline u64 scom_read(scom_map_t map, u32 reg)
+{
+	return scom_controller->read(map, reg);
+}
+
+/**
+ * scom_write - Write to a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ * @value: Value to write
+ */
+static inline void scom_write(scom_map_t map, u32 reg, u64 value)
+{
+	scom_controller->write(map, reg, value);
+}
+
+#endif /* CONFIG_PPC_SCOM */
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_SCOM_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index a902a0d3ae0d..880b8c1e6e53 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -20,6 +20,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
+#include <linux/irqreturn.h>
 
 #ifndef __ASSEMBLY__
 
@@ -29,14 +30,32 @@
 #include <asm/percpu.h>
 
 extern int boot_cpuid;
+extern int boot_cpu_count;
 
 extern void cpu_die(void);
 
 #ifdef CONFIG_SMP
 
-extern void smp_send_debugger_break(int cpu);
-extern void smp_message_recv(int);
+struct smp_ops_t {
+	void  (*message_pass)(int cpu, int msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+	void  (*cause_ipi)(int cpu, unsigned long data);
+#endif
+	int   (*probe)(void);
+	int   (*kick_cpu)(int nr);
+	void  (*setup_cpu)(int nr);
+	void  (*bringup_done)(void);
+	void  (*take_timebase)(void);
+	void  (*give_timebase)(void);
+	int   (*cpu_disable)(void);
+	void  (*cpu_die)(unsigned int nr);
+	int   (*cpu_bootable)(unsigned int nr);
+};
+
+extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
+extern void __devinit smp_generic_give_timebase(void);
+extern void __devinit smp_generic_take_timebase(void);
 
 DECLARE_PER_CPU(unsigned int, cpu_pvr);
 
@@ -93,13 +112,16 @@ extern int cpu_to_core_id(int cpu);
 #define PPC_MSG_CALL_FUNC_SINGLE	2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
-/*
- * irq controllers that have dedicated ipis per message and don't
- * need additional code in the action handler may use this
- */
+/* for irq controllers that have dedicated ipis per message (4) */
 extern int smp_request_message_ipi(int virq, int message);
 extern const char *smp_ipi_name[];
 
+/* for irq controllers with only a single ipi */
+extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
+extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_resend(void);
+extern irqreturn_t smp_ipi_demux(void);
+
 void smp_init_iSeries(void);
 void smp_init_pSeries(void);
 void smp_init_cell(void);
@@ -149,7 +171,7 @@ extern int smt_enabled_at_boot;
 
 extern int smp_mpic_probe(void);
 extern void smp_mpic_setup_cpu(int cpu);
-extern void smp_generic_kick_cpu(int nr);
+extern int smp_generic_kick_cpu(int nr);
 
 extern void smp_generic_give_timebase(void);
 extern void smp_generic_take_timebase(void);
@@ -169,6 +191,8 @@ extern unsigned long __secondary_hold_spinloop;
 extern unsigned long __secondary_hold_acknowledge;
 extern char __secondary_hold;
 
+extern irqreturn_t debug_ipi_action(int irq, void *data);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 5e474ddd2273..2dc595dda03b 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -219,8 +219,6 @@ extern int mem_init_done;	/* set on boot once kmalloc can be called */
 extern int init_bootmem_done;	/* set once bootmem is available */
 extern phys_addr_t memory_limit;
 extern unsigned long klimit;
-
-extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index d50a380b2b6f..81143fcbd113 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -79,6 +79,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 
 #elif defined(CONFIG_PPC_STD_MMU_64)
 
+#define MMU_NO_CONTEXT		0
+
 /*
  * TLB flushing for 64-bit hash-MMU CPUs
  */
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 11ae699135ba..58580e94a2bb 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -52,6 +52,7 @@ extern void __init udbg_init_44x_as1(void);
 extern void __init udbg_init_40x_realmode(void);
 extern void __init udbg_init_cpm(void);
 extern void __init udbg_init_usbgecko(void);
+extern void __init udbg_init_wsp(void);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
index ae9c899c8a6d..d12b11d7641e 100644
--- a/arch/powerpc/include/asm/uninorth.h
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -60,7 +60,7 @@
  *
  * Obviously, the GART is not cache coherent and so any change to it
  * must be flushed to memory (or maybe just make the GART space non
- * cachable). AGP memory itself does't seem to be cache coherent neither.
+ * cachable). AGP memory itself doesn't seem to be cache coherent neither.
  *
  * In order to invalidate the GART (which is probably necessary to inval
  * the bridge internal TLBs), the following sequence has to be written,
diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h
new file mode 100644
index 000000000000..c7dc83088a33
--- /dev/null
+++ b/arch/powerpc/include/asm/wsp.h
@@ -0,0 +1,14 @@
+/*
+ *  Copyright 2011 Michael Ellerman, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#ifndef __ASM_POWERPC_WSP_H
+#define __ASM_POWERPC_WSP_H
+
+extern int wsp_get_chip_id(struct device_node *dn);
+
+#endif /* __ASM_POWERPC_WSP_H */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
new file mode 100644
index 000000000000..b183a4062011
--- /dev/null
+++ b/arch/powerpc/include/asm/xics.h
@@ -0,0 +1,142 @@
+/*
+ * Common definitions accross all variants of ICP and ICS interrupt
+ * controllers.
+ */
+
+#ifndef _XICS_H
+#define _XICS_H
+
+#include <linux/interrupt.h>
+
+#define XICS_IPI		2
+#define XICS_IRQ_SPURIOUS	0
+
+/* Want a priority other than 0.  Various HW issues require this. */
+#define	DEFAULT_PRIORITY	5
+
+/*
+ * Mark IPIs as higher priority so we can take them inside interrupts that
+ * arent marked IRQF_DISABLED
+ */
+#define IPI_PRIORITY		4
+
+/* The least favored priority */
+#define LOWEST_PRIORITY		0xFF
+
+/* The number of priorities defined above */
+#define MAX_NUM_PRIORITIES	3
+
+/* Native ICP */
+extern int icp_native_init(void);
+
+/* PAPR ICP */
+extern int icp_hv_init(void);
+
+/* ICP ops */
+struct icp_ops {
+	unsigned int (*get_irq)(void);
+	void (*eoi)(struct irq_data *d);
+	void (*set_priority)(unsigned char prio);
+	void (*teardown_cpu)(void);
+	void (*flush_ipi)(void);
+#ifdef CONFIG_SMP
+	void (*cause_ipi)(int cpu, unsigned long data);
+	irq_handler_t ipi_action;
+#endif
+};
+
+extern const struct icp_ops *icp_ops;
+
+/* Native ICS */
+extern int ics_native_init(void);
+
+/* RTAS ICS */
+extern int ics_rtas_init(void);
+
+/* ICS instance, hooked up to chip_data of an irq */
+struct ics {
+	struct list_head link;
+	int (*map)(struct ics *ics, unsigned int virq);
+	void (*mask_unknown)(struct ics *ics, unsigned long vec);
+	long (*get_server)(struct ics *ics, unsigned long vec);
+	int (*host_match)(struct ics *ics, struct device_node *node);
+	char data[];
+};
+
+/* Commons */
+extern unsigned int xics_default_server;
+extern unsigned int xics_default_distrib_server;
+extern unsigned int xics_interrupt_server_size;
+extern struct irq_host *xics_host;
+
+struct xics_cppr {
+	unsigned char stack[MAX_NUM_PRIORITIES];
+	int index;
+};
+
+DECLARE_PER_CPU(struct xics_cppr, xics_cppr);
+
+static inline void xics_push_cppr(unsigned int vec)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
+		return;
+
+	if (vec == XICS_IPI)
+		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
+	else
+		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
+}
+
+static inline unsigned char xics_pop_cppr(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index < 1))
+		return LOWEST_PRIORITY;
+
+	return os_cppr->stack[--os_cppr->index];
+}
+
+static inline void xics_set_base_cppr(unsigned char cppr)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	/* we only really want to set the priority when there's
+	 * just one cppr value on the stack
+	 */
+	WARN_ON(os_cppr->index != 0);
+
+	os_cppr->stack[0] = cppr;
+}
+
+static inline unsigned char xics_cppr_top(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+	
+	return os_cppr->stack[os_cppr->index];
+}
+
+DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
+
+extern void xics_init(void);
+extern void xics_setup_cpu(void);
+extern void xics_update_irq_servers(void);
+extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
+extern void xics_mask_unknown_vec(unsigned int vec);
+extern irqreturn_t xics_ipi_dispatch(int cpu);
+extern int xics_smp_probe(void);
+extern void xics_register_ics(struct ics *ics);
+extern void xics_teardown_cpu(void);
+extern void xics_kexec_teardown_cpu(int secondary);
+extern void xics_migrate_irqs_away(void);
+#ifdef CONFIG_SMP
+extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			       unsigned int strict_check);
+#else
+#define xics_get_irq_server(virq, cpumask, strict_check) (xics_default_server)
+#endif
+
+
+#endif /* _XICS_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3bb2a3e6a337..9aab36312572 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,11 +38,14 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power7.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
+obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o
 obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o
 obj-$(CONFIG_PPC_CLOCK)		+= clock.o
 procfs-y			:= proc_powerpc.o
@@ -75,7 +78,6 @@ obj-$(CONFIG_PPC_FSL_BOOK3E)	+= cpu_setup_fsl_booke.o dbell.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= dbell.o
 
 extra-y				:= head_$(CONFIG_WORD_SIZE).o
-extra-$(CONFIG_PPC_BOOK3E_32)	:= head_new_booke.o
 extra-$(CONFIG_40x)		:= head_40x.o
 extra-$(CONFIG_44x)		:= head_44x.o
 extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
@@ -103,6 +105,8 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o crash.o \
 obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
+obj-$(CONFIG_PPC_IO_WORKAROUNDS)	+= io-workarounds.o
+
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 23e6a93145ab..6887661ac072 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -74,6 +74,7 @@ int main(void)
 	DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
 	DEFINE(SIGSEGV, SIGSEGV);
 	DEFINE(NMI_MASK, NMI_MASK);
+	DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
 #else
 	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
new file mode 100644
index 000000000000..7f818feaa7a5
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -0,0 +1,114 @@
+/*
+ *  A2 specific assembly support code
+ *
+ *  Copyright 2009 Ben Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/reg_a2.h>
+#include <asm/reg.h>
+#include <asm/thread_info.h>
+
+/*
+ * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
+ * This also prevents external LPID accesses but that isn't a problem when not a
+ * guest. Under PV, this setting will be ignored and MMUCR will return the right
+ * number of PID bits we can use.
+ */
+#define MMUCR1_EXTEND_PID \
+	(MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
+	 MMUCR1_DTTID | MMUCR1_DCCD)
+
+/*
+ * Use extended PIDs if enabled.
+ * Don't clear the ERATs on context sync events and enable I & D LRU.
+ * Enable ERAT back invalidate when tlbwe overwrites an entry.
+ */
+#define INITIAL_MMUCR1 \
+	(MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
+	 MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
+
+_GLOBAL(__setup_cpu_a2)
+	/* Some of these are actually thread local and some are
+	 * core local but doing it always won't hurt
+	 */
+
+#ifdef CONFIG_PPC_WSP_COPRO
+	/* Make sure ACOP starts out as zero */
+	li	r3,0
+	mtspr   SPRN_ACOP,r3
+
+	/* Enable icswx instruction */
+	mfspr   r3,SPRN_A2_CCR2
+	ori     r3,r3,A2_CCR2_ENABLE_ICSWX
+	mtspr   SPRN_A2_CCR2,r3
+
+	/* Unmask all CTs in HACOP */
+	li      r3,-1
+	mtspr   SPRN_HACOP,r3
+#endif /* CONFIG_PPC_WSP_COPRO */
+
+	/* Enable doorbell */
+	mfspr   r3,SPRN_A2_CCR2
+	oris     r3,r3,A2_CCR2_ENABLE_PC@h
+	mtspr   SPRN_A2_CCR2,r3
+	isync
+
+	/* Setup CCR0 to disable power saving for now as it's busted
+	 * in the current implementations. Setup CCR1 to wake on
+	 * interrupts normally (we write the default value but who
+	 * knows what FW may have clobbered...)
+	 */
+	li	r3,0
+	mtspr	SPRN_A2_CCR0, r3
+	LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
+	mtspr	SPRN_A2_CCR1, r3
+
+	/* Initialise MMUCR1 */
+	lis	r3,INITIAL_MMUCR1@h
+	ori	r3,r3,INITIAL_MMUCR1@l
+	mtspr	SPRN_MMUCR1,r3
+
+	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+	LOAD_REG_IMMEDIATE(r3, 0x000a7531)
+	mtspr	SPRN_MMUCR2,r3
+
+	/* Set MMUCR3 to write all thids bit to the TLB */
+	LOAD_REG_IMMEDIATE(r3, 0x0000000f)
+	mtspr	SPRN_MMUCR3,r3
+
+	/* Don't do ERAT stuff if running guest mode */
+	mfmsr	r3
+	andis.	r0,r3,MSR_GS@h
+	bne	1f
+
+	/* Now set the I-ERAT watermark to 15 */
+	lis	r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
+	mtspr	SPRN_MMUCR0, r4
+	li	r4,A2_IERAT_SIZE-1
+	PPC_ERATWE(r4,r4,3)
+
+	/* Now set the D-ERAT watermark to 31 */
+	lis	r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
+	mtspr	SPRN_MMUCR0, r4
+	li	r4,A2_DERAT_SIZE-1
+	PPC_ERATWE(r4,r4,3)
+
+	/* And invalidate the beast just in case. That won't get rid of
+	 * a bolted entry though it will be in LRU and so will go away eventually
+	 * but let's not bother for now
+	 */
+	PPC_ERATILX(0,0,0)
+1:
+	blr
+
+_GLOBAL(__restore_cpu_a2)
+	b	__setup_cpu_a2
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 913611105c1f..8053db02b85e 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -88,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500)
 	bl	__e500_dcache_setup
 #ifdef CONFIG_PPC_BOOK3E_64
 	bl	.__setup_base_ivors
+	bl	.setup_perfmon_ivor
+	bl	.setup_doorbell_ivors
+	bl	.setup_ehv_ivors
 #else
 	bl	__setup_e500mc_ivors
 #endif
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
new file mode 100644
index 000000000000..4f9a93fcfe07
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -0,0 +1,91 @@
+/*
+ * This file contains low level CPU setup functions.
+ *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Entry: r3 = crap, r4 = ptr to cputable entry
+ *
+ * Note that we can be called twice for pseudo-PVRs
+ */
+_GLOBAL(__setup_cpu_power7)
+	mflr	r11
+	bl	__init_hvmode_206
+	mtlr	r11
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	bl	__init_LPCR
+	bl	__init_TLB
+	mtlr	r11
+	blr
+
+_GLOBAL(__restore_cpu_power7)
+	mflr	r11
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	bl	__init_LPCR
+	bl	__init_TLB
+	mtlr	r11
+	blr
+
+__init_hvmode_206:
+	/* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	bnelr
+	ld	r5,CPU_SPEC_FEATURES(r4)
+	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206)
+	xor	r5,r5,r6
+	std	r5,CPU_SPEC_FEATURES(r4)
+	blr
+
+__init_LPCR:
+	/* Setup a sane LPCR:
+	 *
+	 *   LPES = 0b01 (HSRR0/1 used for 0x500)
+	 *   PECE = 0b111
+	 *   DPFD = 4
+	 *
+	 * Other bits untouched for now
+	 */
+	mfspr	r3,SPRN_LPCR
+	ori	r3,r3,(LPCR_LPES0|LPCR_LPES1)
+	xori	r3,r3, LPCR_LPES0
+	ori	r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
+	li	r5,7
+	sldi	r5,r5,LPCR_DPFD_SH
+	andc	r3,r3,r5
+	li	r5,4
+	sldi	r5,r5,LPCR_DPFD_SH
+	or	r3,r3,r5
+	mtspr	SPRN_LPCR,r3
+	isync
+	blr
+
+__init_TLB:
+	/* Clear the TLB */
+	li	r6,128
+	mtctr	r6
+	li	r7,0xc00	/* IS field = 0b11 */
+	ptesync
+2:	tlbiel	r7
+	addi	r7,r7,0x1000
+	bdnz	2b
+	ptesync
+1:	blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c9b68d07ac4f..34d2722b9451 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_pa6t(void);
 extern void __restore_cpu_ppc970(void);
 extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power7(void);
+extern void __restore_cpu_a2(void);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -199,7 +201,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4 (gp)",
 		.cpu_features		= CPU_FTRS_POWER4,
 		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER4,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -214,7 +216,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4+ (gq)",
 		.cpu_features		= CPU_FTRS_POWER4,
 		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER4,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -230,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -248,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -284,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -302,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -318,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5 (gr)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -338,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+ (gs)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -354,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+ (gs)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -371,7 +373,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
@@ -385,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_POWER6,
 		.cpu_user_features	= COMMON_USER_POWER6 |
 			PPC_FEATURE_POWER6_EXT,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER6,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -404,7 +406,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER6 (architected)",
 		.cpu_features		= CPU_FTRS_POWER6,
 		.cpu_user_features	= COMMON_USER_POWER6,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER6,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
@@ -417,12 +419,13 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (architected)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7",
 	},
 	{	/* Power7 */
@@ -431,14 +434,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
 		.pmc_type		= PPC_PMC_IBM,
 		.oprofile_cpu_type	= "ppc64/power7",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7",
 	},
 	{	/* Power7+ */
@@ -447,14 +451,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7+ (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
 		.pmc_type		= PPC_PMC_IBM,
 		.oprofile_cpu_type	= "ppc64/power7",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7+",
 	},
 	{	/* Cell Broadband Engine */
@@ -465,7 +470,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_user_features	= COMMON_USER_PPC64 |
 			PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
 			PPC_FEATURE_SMT,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_CELL,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 4,
@@ -480,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "PA6T",
 		.cpu_features		= CPU_FTRS_PA6T,
 		.cpu_user_features	= COMMON_USER_PA6T,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PA6T,
 		.icache_bsize		= 64,
 		.dcache_bsize		= 64,
 		.num_pmcs		= 6,
@@ -497,7 +502,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4 (compatible)",
 		.cpu_features		= CPU_FTRS_COMPATIBLE,
 		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -1973,7 +1978,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x80240000,
 		.cpu_name		= "e5500",
-		.cpu_features		= CPU_FTRS_E500MC,
+		.cpu_features		= CPU_FTRS_E5500,
 		.cpu_user_features	= COMMON_USER_BOOKE,
 		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
 			MMU_FTR_USE_TLBILX,
@@ -2005,7 +2010,22 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif /* CONFIG_PPC32 */
 #endif /* CONFIG_E500 */
 
-#ifdef CONFIG_PPC_BOOK3E_64
+#ifdef CONFIG_PPC_A2
+	{	/* Standard A2 (>= DD2) + FPU core */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00480000,
+		.cpu_name		= "A2 (>= DD2)",
+		.cpu_features		= CPU_FTRS_A2,
+		.cpu_user_features	= COMMON_USER_PPC64,
+		.mmu_features		= MMU_FTRS_A2,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 0,
+		.cpu_setup		= __setup_cpu_a2,
+		.cpu_restore		= __restore_cpu_a2,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppca2",
+	},
 	{	/* This is a default entry to get going, to be replaced by
 		 * a real one at some stage
 		 */
@@ -2026,7 +2046,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_generic,
 		.platform		= "power6",
 	},
-#endif
+#endif /* CONFIG_PPC_A2 */
 };
 
 static struct cpu_spec the_cpu_spec;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 3d3d416339dd..4e6ee944495a 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs)
 		return;
 
 	hard_irq_disable();
-	if (!cpu_isset(cpu, cpus_in_crash))
+	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
 		crash_save_cpu(regs, cpu);
-	cpu_set(cpu, cpus_in_crash);
+	cpumask_set_cpu(cpu, &cpus_in_crash);
 
 	/*
 	 * Entered via soft-reset - could be the kdump
@@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs)
 	 * Tell the kexec CPU that entered via soft-reset and ready
 	 * to go down.
 	 */
-	if (cpu_isset(cpu, cpus_in_sr)) {
-		cpu_clear(cpu, cpus_in_sr);
+	if (cpumask_test_cpu(cpu, &cpus_in_sr)) {
+		cpumask_clear_cpu(cpu, &cpus_in_sr);
 		atomic_inc(&enter_on_soft_reset);
 	}
 
@@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs)
 	 * This barrier is needed to make sure that all CPUs are stopped.
 	 * If not, soft-reset will be invoked to bring other CPUs.
 	 */
-	while (!cpu_isset(crashing_cpu, cpus_in_crash))
+	while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash))
 		cpu_relax();
 
 	if (ppc_md.kexec_cpu_down)
@@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu)
 {
 	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 
-	cpu_clear(cpu, cpus_in_sr);
+	cpumask_clear_cpu(cpu, &cpus_in_sr);
 	while (atomic_read(&enter_on_soft_reset) != ncpus)
 		cpu_relax();
 }
@@ -132,7 +132,7 @@ static void crash_kexec_prepare_cpus(int cpu)
 	 */
 	printk(KERN_EMERG "Sending IPI to other cpus...\n");
 	msecs = 10000;
-	while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
+	while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
 		cpu_relax();
 		mdelay(1);
 	}
@@ -144,54 +144,24 @@ static void crash_kexec_prepare_cpus(int cpu)
 	 * user to do soft reset such that we get all.
 	 * Soft-reset will be used until better mechanism is implemented.
 	 */
-	if (cpus_weight(cpus_in_crash) < ncpus) {
+	if (cpumask_weight(&cpus_in_crash) < ncpus) {
 		printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
-			ncpus - cpus_weight(cpus_in_crash));
+			ncpus - cpumask_weight(&cpus_in_crash));
 		printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
-		cpus_in_sr = CPU_MASK_NONE;
+		cpumask_clear(&cpus_in_sr);
 		atomic_set(&enter_on_soft_reset, 0);
-		while (cpus_weight(cpus_in_crash) < ncpus)
+		while (cpumask_weight(&cpus_in_crash) < ncpus)
 			cpu_relax();
 	}
 	/*
 	 * Make sure all CPUs are entered via soft-reset if the kdump is
 	 * invoked using soft-reset.
 	 */
-	if (cpu_isset(cpu, cpus_in_sr))
+	if (cpumask_test_cpu(cpu, &cpus_in_sr))
 		crash_soft_reset_check(cpu);
 	/* Leave the IPI callback set */
 }
 
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP)
-static void crash_kexec_wait_realmode(int cpu)
-{
-	unsigned int msecs;
-	int i;
-
-	msecs = 10000;
-	for (i=0; i < NR_CPUS && msecs > 0; i++) {
-		if (i == cpu)
-			continue;
-
-		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
-			barrier();
-			if (!cpu_possible(i)) {
-				break;
-			}
-			if (!cpu_online(i)) {
-				break;
-			}
-			msecs--;
-			mdelay(1);
-		}
-	}
-	mb();
-}
-#else
-static inline void crash_kexec_wait_realmode(int cpu) {}
-#endif
-
 /*
  * This function will be called by secondary cpus or by kexec cpu
  * if soft-reset is activated to stop some CPUs.
@@ -212,7 +182,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 			 * exited using 'x'(exit and recover) or
 			 * kexec_should_crash() failed for all running tasks.
 			 */
-			cpu_clear(cpu, cpus_in_sr);
+			cpumask_clear_cpu(cpu, &cpus_in_sr);
 			local_irq_restore(flags);
 			return;
 		}
@@ -226,7 +196,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 		 * then start kexec boot.
 		 */
 		crash_soft_reset_check(cpu);
-		cpu_set(crashing_cpu, cpus_in_crash);
+		cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 		if (ppc_md.kexec_cpu_down)
 			ppc_md.kexec_cpu_down(1, 0);
 		machine_kexec(kexec_crash_image);
@@ -235,7 +205,8 @@ void crash_kexec_secondary(struct pt_regs *regs)
 	crash_ipi_callback(regs);
 }
 
-#else
+#else	/* ! CONFIG_SMP */
+
 static void crash_kexec_prepare_cpus(int cpu)
 {
 	/*
@@ -253,9 +224,39 @@ static void crash_kexec_prepare_cpus(int cpu)
 
 void crash_kexec_secondary(struct pt_regs *regs)
 {
-	cpus_in_sr = CPU_MASK_NONE;
+	cpumask_clear(&cpus_in_sr);
 }
-#endif
+#endif	/* CONFIG_SMP */
+
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
+{
+	unsigned int msecs;
+	int i;
+
+	msecs = 10000;
+	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+		if (i == cpu)
+			continue;
+
+		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			if (!cpu_possible(i)) {
+				break;
+			}
+			if (!cpu_online(i)) {
+				break;
+			}
+			msecs--;
+			mdelay(1);
+		}
+	}
+	mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif	/* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
 
 /*
  * Register a function to be called on shutdown.  Only use this if you
@@ -345,7 +346,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	crashing_cpu = smp_processor_id();
 	crash_save_cpu(regs, crashing_cpu);
 	crash_kexec_prepare_cpus(crashing_cpu);
-	cpu_set(crashing_cpu, cpus_in_crash);
+	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 	crash_kexec_wait_realmode(crashing_cpu);
 
 	machine_kexec_mask_interrupts();
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 3307a52d797f..2cc451aaaca7 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -13,84 +13,35 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
-#include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 #include <asm/dbell.h>
 #include <asm/irq_regs.h>
 
 #ifdef CONFIG_SMP
-struct doorbell_cpu_info {
-	unsigned long	messages;	/* current messages bits */
-	unsigned int	tag;		/* tag value */
-};
-
-static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info);
-
 void doorbell_setup_this_cpu(void)
 {
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
+	unsigned long tag = mfspr(SPRN_PIR) & 0x3fff;
 
-	info->messages = 0;
-	info->tag = mfspr(SPRN_PIR) & 0x3fff;
+	smp_muxed_ipi_set_data(smp_processor_id(), tag);
 }
 
-void doorbell_message_pass(int target, int msg)
+void doorbell_cause_ipi(int cpu, unsigned long data)
 {
-	struct doorbell_cpu_info *info;
-	int i;
-
-	if (target < NR_CPUS) {
-		info = &per_cpu(doorbell_cpu_info, target);
-		set_bit(msg, &info->messages);
-		ppc_msgsnd(PPC_DBELL, 0, info->tag);
-	}
-	else if (target == MSG_ALL_BUT_SELF) {
-		for_each_online_cpu(i) {
-			if (i == smp_processor_id())
-				continue;
-			info = &per_cpu(doorbell_cpu_info, i);
-			set_bit(msg, &info->messages);
-			ppc_msgsnd(PPC_DBELL, 0, info->tag);
-		}
-	}
-	else { /* target == MSG_ALL */
-		for_each_online_cpu(i) {
-			info = &per_cpu(doorbell_cpu_info, i);
-			set_bit(msg, &info->messages);
-		}
-		ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0);
-	}
+	ppc_msgsnd(PPC_DBELL, 0, data);
 }
 
 void doorbell_exception(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
-	int msg;
 
-	/* Warning: regs can be NULL when called from irq enable */
+	irq_enter();
 
-	if (!info->messages || (num_online_cpus() < 2))
-		goto out;
+	smp_ipi_demux();
 
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &info->messages))
-			smp_message_recv(msg);
-
-out:
+	irq_exit();
 	set_irq_regs(old_regs);
 }
-
-void doorbell_check_self(void)
-{
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
-
-	if (!info->messages)
-		return;
-
-	ppc_msgsnd(PPC_DBELL, 0, info->tag);
-}
-
 #else /* CONFIG_SMP */
 void doorbell_exception(struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d82878c4daa6..d834425186ae 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -421,6 +421,12 @@ BEGIN_FTR_SECTION
 	std	r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	mfspr	r25,SPRN_DSCR
+	std	r25,THREAD_DSCR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
 	and.	r0,r0,r22
 	beq+	1f
 	andc	r22,r22,r0
@@ -462,10 +468,10 @@ BEGIN_FTR_SECTION
   FTR_SECTION_ELSE_NESTED(95)
 	clrrdi	r6,r8,40	/* get its 1T ESID */
 	clrrdi	r9,r1,40	/* get current sp 1T ESID */
-  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95)
+  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
 FTR_SECTION_ELSE
 	b	2f
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
 	clrldi.	r0,r6,2		/* is new ESID c00000000? */
 	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
 	cror	eq,4*cr1+eq,eq
@@ -479,7 +485,7 @@ BEGIN_FTR_SECTION
 	li	r9,MMU_SEGSIZE_1T	/* insert B field */
 	oris	r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
 	rldimi	r7,r9,SLB_VSID_SSIZE_SHIFT,0
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 
 	/* Update the last bolted SLB.  No write barriers are needed
 	 * here, provided we only update the current CPU's SLB shadow
@@ -491,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
 	std	r7,SLBSHADOW_STACKVSID(r9)  /* Save VSID */
 	std	r0,SLBSHADOW_STACKESID(r9)  /* Save ESID */
 
-	/* No need to check for CPU_FTR_NO_SLBIE_B here, since when
+	/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
 	 * we have 1TB segments, the only CPUs known to have the errata
 	 * only support less than 1TB of system memory and we'll never
 	 * actually hit this code path.
@@ -522,6 +528,15 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	ld	r0,THREAD_DSCR(r4)
+	cmpd	r0,r25
+	beq	1f
+	mtspr	SPRN_DSCR,r0
+1:	
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
 
 	/* r3-r13 are destroyed -- Cort */
 	REST_8GPRS(14, r1)
@@ -838,7 +853,7 @@ _GLOBAL(enter_rtas)
 
 _STATIC(rtas_return_loc)
 	/* relocation is off at this point */
-	mfspr	r4,SPRN_SPRG_PACA	/* Get PACA */
+	GET_PACA(r4)
 	clrldi	r4,r4,2			/* convert to realmode address */
 
 	bcl	20,31,$+4
@@ -869,7 +884,7 @@ _STATIC(rtas_restore_regs)
 	REST_8GPRS(14, r1)		/* Restore the non-volatiles */
 	REST_10GPRS(22, r1)		/* ditto */
 
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 
 	ld	r4,_CCR(r1)
 	mtcr	r4
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 9651acc3504a..d24d4400cc79 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -17,6 +17,7 @@
 #include <asm/cputable.h>
 #include <asm/setup.h>
 #include <asm/thread_info.h>
+#include <asm/reg_a2.h>
 #include <asm/exception-64e.h>
 #include <asm/bug.h>
 #include <asm/irqflags.h>
@@ -252,9 +253,6 @@ exception_marker:
 	.balign	0x1000
 	.globl interrupt_base_book3e
 interrupt_base_book3e:					/* fake trap */
-	/* Note: If real debug exceptions are supported by the HW, the vector
-	 * below will have to be patched up to point to an appropriate handler
-	 */
 	EXCEPTION_STUB(0x000, machine_check)		/* 0x0200 */
 	EXCEPTION_STUB(0x020, critical_input)		/* 0x0580 */
 	EXCEPTION_STUB(0x040, debug_crit)		/* 0x0d00 */
@@ -271,8 +269,13 @@ interrupt_base_book3e:					/* fake trap */
 	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */
 	EXCEPTION_STUB(0x1c0, data_tlb_miss)
 	EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+	EXCEPTION_STUB(0x260, perfmon)
 	EXCEPTION_STUB(0x280, doorbell)
 	EXCEPTION_STUB(0x2a0, doorbell_crit)
+	EXCEPTION_STUB(0x2c0, guest_doorbell)
+	EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
+	EXCEPTION_STUB(0x300, hypercall)
+	EXCEPTION_STUB(0x320, ehpriv)
 
 	.globl interrupt_end_book3e
 interrupt_end_book3e:
@@ -454,6 +457,70 @@ interrupt_end_book3e:
 kernel_dbg_exc:
 	b	.	/* NYI */
 
+/* Debug exception as a debug interrupt*/
+	START_EXCEPTION(debug_debug);
+	DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+
+	/*
+	 * If there is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the DSRR1 value and clearing the debug status.
+	 */
+
+	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r15,r14,DBSR_IC@h
+	beq+	1f
+
+	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+	LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+	cmpld	cr0,r10,r14
+	cmpld	cr1,r10,r15
+	blt+	cr0,1f
+	bge+	cr1,1f
+
+	/* here it looks like we got an inappropriate debug exception. */
+	lis	r14,DBSR_IC@h		/* clear the IC event */
+	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the DSRR1 value */
+	mtspr	SPRN_DBSR,r14
+	mtspr	SPRN_DSRR1,r11
+	lwz	r10,PACA_EXDBG+EX_CR(r13)	/* restore registers */
+	ld	r1,PACA_EXDBG+EX_R1(r13)
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	mtcr	r10
+	ld	r10,PACA_EXDBG+EX_R10(r13)	/* restore registers */
+	ld	r11,PACA_EXDBG+EX_R11(r13)
+	mfspr	r13,SPRN_SPRG_DBG_SCRATCH
+	rfdi
+
+	/* Normal debug exception */
+	/* XXX We only handle coming from userspace for now since we can't
+	 *     quite save properly an interrupted kernel state yet
+	 */
+1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
+	beq	kernel_dbg_exc;		/* if from kernel mode */
+
+	/* Now we mash up things to make it look like we are coming on a
+	 * normal exception
+	 */
+	mfspr	r15,SPRN_SPRG_DBG_SCRATCH
+	mtspr	SPRN_SPRG_GEN_SCRATCH,r15
+	mfspr	r14,SPRN_DBSR
+	EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL)
+	std	r14,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r14
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	bl	.save_nvgprs
+	bl	.DebugException
+	b	.ret_from_except
+
+	MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE)
+
 /* Doorbell interrupt */
 	MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE)
 
@@ -468,6 +535,11 @@ kernel_dbg_exc:
 //	b	ret_from_crit_except
 	b	.
 
+	MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE)
+
 
 /*
  * An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -587,7 +659,12 @@ fast_exception_return:
 BAD_STACK_TRAMPOLINE(0x000)
 BAD_STACK_TRAMPOLINE(0x100)
 BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x260)
+BAD_STACK_TRAMPOLINE(0x2c0)
+BAD_STACK_TRAMPOLINE(0x2e0)
 BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x310)
+BAD_STACK_TRAMPOLINE(0x320)
 BAD_STACK_TRAMPOLINE(0x400)
 BAD_STACK_TRAMPOLINE(0x500)
 BAD_STACK_TRAMPOLINE(0x600)
@@ -864,8 +941,23 @@ have_hes:
 	 * that will have to be made dependent on whether we are running under
 	 * a hypervisor I suppose.
 	 */
-	ori	r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS
-	mtspr	SPRN_MAS0,r3
+
+	/* BEWARE, MAGIC
+	 * This code is called as an ordinary function on the boot CPU. But to
+	 * avoid duplication, this code is also used in SCOM bringup of
+	 * secondary CPUs. We read the code between the initial_tlb_code_start
+	 * and initial_tlb_code_end labels one instruction at a time and RAM it
+	 * into the new core via SCOM. That doesn't process branches, so there
+	 * must be none between those two labels. It also means if this code
+	 * ever takes any parameters, the SCOM code must also be updated to
+	 * provide them.
+	 */
+	.globl a2_tlbinit_code_start
+a2_tlbinit_code_start:
+
+	ori	r11,r3,MAS0_WQ_ALLWAYS
+	oris	r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
+	mtspr	SPRN_MAS0,r11
 	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
 	ori	r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
 	mtspr	SPRN_MAS1,r3
@@ -879,18 +971,86 @@ have_hes:
 	/* Write the TLB entry */
 	tlbwe
 
+	.globl a2_tlbinit_after_linear_map
+a2_tlbinit_after_linear_map:
+
 	/* Now we branch the new virtual address mapped by this entry */
 	LOAD_REG_IMMEDIATE(r3,1f)
 	mtctr	r3
 	bctr
 
 1:	/* We are now running at PAGE_OFFSET, clean the TLB of everything
-	 * else (XXX we should scan for bolted crap from the firmware too)
+	 * else (including IPROTed things left by firmware)
+	 * r4 = TLBnCFG
+	 * r3 = current address (more or less)
 	 */
+
+	li	r5,0
+	mtspr	SPRN_MAS6,r5
+	tlbsx	0,r3
+
+	rlwinm	r9,r4,0,TLBnCFG_N_ENTRY
+	rlwinm	r10,r4,8,0xff
+	addi	r10,r10,-1	/* Get inner loop mask */
+
+	li	r3,1
+
+	mfspr	r5,SPRN_MAS1
+	rlwinm	r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
+
+	mfspr	r6,SPRN_MAS2
+	rldicr	r6,r6,0,51		/* Extract EPN */
+
+	mfspr	r7,SPRN_MAS0
+	rlwinm	r7,r7,0,0xffff0fff	/* Clear HES and WQ */
+
+	rlwinm	r8,r7,16,0xfff		/* Extract ESEL */
+
+2:	add	r4,r3,r8
+	and	r4,r4,r10
+
+	rlwimi	r7,r4,16,MAS0_ESEL_MASK
+
+	mtspr	SPRN_MAS0,r7
+	mtspr	SPRN_MAS1,r5
+	mtspr	SPRN_MAS2,r6
+	tlbwe
+
+	addi	r3,r3,1
+	and.	r4,r3,r10
+
+	bne	3f
+	addis	r6,r6,(1<<30)@h
+3:
+	cmpw	r3,r9
+	blt	2b
+
+	.globl  a2_tlbinit_after_iprot_flush
+a2_tlbinit_after_iprot_flush:
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+	/* Now establish early debug mappings if applicable */
+	/* Restore the MAS0 we used for linear mapping load */
+	mtspr	SPRN_MAS0,r11
+
+	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
+	ori	r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
+	mtspr	SPRN_MAS1,r3
+	LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
+	mtspr	SPRN_MAS2,r3
+	LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
+	mtspr	SPRN_MAS7_MAS3,r3
+	/* re-use the MAS8 value from the linear mapping */
+	tlbwe
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
+
 	PPC_TLBILX(0,0,0)
 	sync
 	isync
 
+	.globl a2_tlbinit_code_end
+a2_tlbinit_code_end:
+
 	/* We translate LR and return */
 	mflr	r3
 	tovirt(r3,r3)
@@ -1040,3 +1200,33 @@ _GLOBAL(__setup_base_ivors)
 	sync
 
 	blr
+
+_GLOBAL(setup_perfmon_ivor)
+	SET_IVOR(35, 0x260) /* Performance Monitor */
+	blr
+
+_GLOBAL(setup_doorbell_ivors)
+	SET_IVOR(36, 0x280) /* Processor Doorbell */
+	SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
+
+	/* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beqlr
+
+	SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
+	SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
+	blr
+
+_GLOBAL(setup_ehv_ivors)
+	/*
+	 * We may be running as a guest and lack E.HV even on a chip
+	 * that normally has it.
+	 */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beqlr
+
+	SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
+	SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
+	blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index aeb739e18769..a85f4874cba7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -37,23 +37,51 @@
 	.globl __start_interrupts
 __start_interrupts:
 
-	STD_EXCEPTION_PSERIES(0x100, system_reset)
+	.globl system_reset_pSeries;
+system_reset_pSeries:
+	HMT_MEDIUM;
+	DO_KVM	0x100;
+	SET_SCRATCH0(r13)
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+	/* Running native on arch 2.06 or later, check if we are
+	 * waking up from nap. We only handle no state loss and
+	 * supervisor state loss. We do -not- handle hypervisor
+	 * state loss at this time.
+	 */
+	mfspr	r13,SPRN_SRR1
+	rlwinm	r13,r13,47-31,30,31
+	cmpwi	cr0,r13,1
+	bne	1f
+	b	.power7_wakeup_noloss
+1:	cmpwi	cr0,r13,2
+	bne	1f
+	b	.power7_wakeup_loss
+	/* Total loss of HV state is fatal, we could try to use the
+	 * PIR to locate a PACA, then use an emergency stack etc...
+	 * but for now, let's just stay stuck here
+	 */
+1:	cmpwi	cr0,r13,3
+	beq	.
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206)
+#endif /* CONFIG_PPC_P7_NAP */
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
 
 	. = 0x200
 _machine_check_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x200
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+	SET_SCRATCH0(r13)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 	. = 0x300
 	.globl data_access_pSeries
 data_access_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x300
-	mtspr	SPRN_SPRG_SCRATCH0,r13
+	SET_SCRATCH0(r13)
 BEGIN_FTR_SECTION
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 	std	r9,PACA_EXSLB+EX_R9(r13)
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	mfspr	r10,SPRN_DAR
@@ -67,22 +95,22 @@ BEGIN_FTR_SECTION
 	std	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r11,PACA_EXSLB+EX_R9(r13)
 	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r12,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r12)
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	std	r11,PACA_EXGEN+EX_R9(r13)
 	std	r12,PACA_EXGEN+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(data_access_common)
+	EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD)
 FTR_SECTION_ELSE
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
 
 	. = 0x380
 	.globl data_access_slb_pSeries
 data_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x380
-	mtspr	SPRN_SPRG_SCRATCH0,r13
-	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_DAR
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
@@ -95,7 +123,7 @@ data_access_slb_pSeries:
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
 #ifndef CONFIG_RELOCATABLE
@@ -113,15 +141,15 @@ data_access_slb_pSeries:
 	bctr
 #endif
 
-	STD_EXCEPTION_PSERIES(0x400, instruction_access)
+	STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
 
 	. = 0x480
 	.globl instruction_access_slb_pSeries
 instruction_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x480
-	mtspr	SPRN_SPRG_SCRATCH0,r13
-	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
@@ -134,7 +162,7 @@ instruction_access_slb_pSeries:
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
 #ifndef CONFIG_RELOCATABLE
@@ -147,13 +175,29 @@ instruction_access_slb_pSeries:
 	bctr
 #endif
 
-	MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt)
-	STD_EXCEPTION_PSERIES(0x600, alignment)
-	STD_EXCEPTION_PSERIES(0x700, program_check)
-	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
-	MASKABLE_EXCEPTION_PSERIES(0x900, decrementer)
-	STD_EXCEPTION_PSERIES(0xa00, trap_0a)
-	STD_EXCEPTION_PSERIES(0xb00, trap_0b)
+	/* We open code these as we can't have a ". = x" (even with
+	 * x = "." within a feature section
+	 */
+	. = 0x500;
+	.globl hardware_interrupt_pSeries;
+	.globl hardware_interrupt_hv;
+hardware_interrupt_pSeries:
+hardware_interrupt_hv:
+	BEGIN_FTR_SECTION
+		_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD)
+	FTR_SECTION_ELSE
+		_MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV)
+	ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206)
+
+	STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
+	STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
+	STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
+
+	MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
+	MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer)
+
+	STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
+	STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
 
 	. = 0xc00
 	.globl	system_call_pSeries
@@ -165,13 +209,13 @@ BEGIN_FTR_SECTION
 	beq-	1f
 END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	mr	r9,r13
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 	mfspr	r11,SPRN_SRR0
-	ld	r12,PACAKBASE(r13)
-	ld	r10,PACAKMSR(r13)
-	LOAD_HANDLER(r12, system_call_entry)
-	mtspr	SPRN_SRR0,r12
 	mfspr	r12,SPRN_SRR1
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10, system_call_entry)
+	mtspr	SPRN_SRR0,r10
+	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
 	rfid
 	b	.	/* prevent speculative execution */
@@ -183,8 +227,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	rfid		/* return to userspace */
 	b	.
 
-	STD_EXCEPTION_PSERIES(0xd00, single_step)
-	STD_EXCEPTION_PSERIES(0xe00, trap_0e)
+	STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
+
+	/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
+	 * out of line to handle them
+	 */
+	. = 0xe00
+	b	h_data_storage_hv
+	. = 0xe20
+	b	h_instr_storage_hv
+	. = 0xe40
+	b	emulation_assist_hv
+	. = 0xe50
+	b	hmi_exception_hv
+	. = 0xe60
+	b	hmi_exception_hv
 
 	/* We need to deal with the Altivec unavailable exception
 	 * here which is at 0xf20, thus in the middle of the
@@ -193,39 +250,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	 */
 performance_monitor_pSeries_1:
 	. = 0xf00
-	DO_KVM	0xf00
 	b	performance_monitor_pSeries
 
 altivec_unavailable_pSeries_1:
 	. = 0xf20
-	DO_KVM	0xf20
 	b	altivec_unavailable_pSeries
 
 vsx_unavailable_pSeries_1:
 	. = 0xf40
-	DO_KVM	0xf40
 	b	vsx_unavailable_pSeries
 
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
+	STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
 #endif /* CONFIG_CBE_RAS */
-	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
+	STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
+	STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
 #endif /* CONFIG_CBE_RAS */
-	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
+	STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
+	STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
 #endif /* CONFIG_CBE_RAS */
 
 	. = 0x3000
 
-/*** pSeries interrupt support ***/
+/*** Out of line interrupts support ***/
+
+	/* moved from 0xe00 */
+	STD_EXCEPTION_HV(., 0xe00, h_data_storage)
+	STD_EXCEPTION_HV(., 0xe20, h_instr_storage)
+	STD_EXCEPTION_HV(., 0xe40, emulation_assist)
+	STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */
 
 	/* moved from 0xf00 */
-	STD_EXCEPTION_PSERIES(., performance_monitor)
-	STD_EXCEPTION_PSERIES(., altivec_unavailable)
-	STD_EXCEPTION_PSERIES(., vsx_unavailable)
+	STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
+	STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
+	STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
 
 /*
  * An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -240,17 +300,30 @@ masked_interrupt:
 	rotldi	r10,r10,16
 	mtspr	SPRN_SRR1,r10
 	ld	r10,PACA_EXGEN+EX_R10(r13)
-	mfspr	r13,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r13)
 	rfid
 	b	.
 
+masked_Hinterrupt:
+	stb	r10,PACAHARDIRQEN(r13)
+	mtcrf	0x80,r9
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+	mfspr	r10,SPRN_HSRR1
+	rldicl	r10,r10,48,1		/* clear MSR_EE */
+	rotldi	r10,r10,16
+	mtspr	SPRN_HSRR1,r10
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	GET_SCRATCH0(r13)
+	hrfid
+	b	.
+
 	.align	7
 do_stab_bolted_pSeries:
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted)
+	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
 
 #ifdef CONFIG_PPC_PSERIES
 /*
@@ -260,15 +333,15 @@ do_stab_bolted_pSeries:
       .align 7
 system_reset_fwnmi:
 	HMT_MEDIUM
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
+	SET_SCRATCH0(r13)		/* save r13 */
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
 
 	.globl machine_check_fwnmi
       .align 7
 machine_check_fwnmi:
 	HMT_MEDIUM
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+	SET_SCRATCH0(r13)		/* save r13 */
+	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 #endif /* CONFIG_PPC_PSERIES */
 
@@ -282,7 +355,7 @@ slb_miss_user_pseries:
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	std	r11,PACA_EXGEN+EX_R11(r13)
 	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r10,SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	ld	r11,PACA_EXSLB+EX_R9(r13)
 	ld	r12,PACA_EXSLB+EX_R3(r13)
 	std	r10,PACA_EXGEN+EX_R13(r13)
@@ -342,6 +415,8 @@ machine_check_common:
 	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
 	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
 	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
+        STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+        STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
 	STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
 	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
 #ifdef CONFIG_ALTIVEC
@@ -386,9 +461,24 @@ bad_stack:
 	std	r12,_XER(r1)
 	SAVE_GPR(0,r1)
 	SAVE_GPR(2,r1)
-	SAVE_4GPRS(3,r1)
-	SAVE_2GPRS(7,r1)
-	SAVE_10GPRS(12,r1)
+	ld	r10,EX_R3(r3)
+	std	r10,GPR3(r1)
+	SAVE_GPR(4,r1)
+	SAVE_4GPRS(5,r1)
+	ld	r9,EX_R9(r3)
+	ld	r10,EX_R10(r3)
+	SAVE_2GPRS(9,r1)
+	ld	r9,EX_R11(r3)
+	ld	r10,EX_R12(r3)
+	ld	r11,EX_R13(r3)
+	std	r9,GPR11(r1)
+	std	r10,GPR12(r1)
+	std	r11,GPR13(r1)
+BEGIN_FTR_SECTION
+	ld	r10,EX_CFAR(r3)
+	std	r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	SAVE_8GPRS(14,r1)
 	SAVE_10GPRS(22,r1)
 	lhz	r12,PACA_TRAP_SAVE(r13)
 	std	r12,_TRAP(r1)
@@ -397,6 +487,9 @@ bad_stack:
 	li	r12,0
 	std	r12,0(r11)
 	ld	r2,PACATOC(r13)
+	ld	r11,exception_marker@toc(r2)
+	std	r12,RESULT(r1)
+	std	r11,STACK_FRAME_OVERHEAD-16(r1)
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.kernel_bad_stack
 	b	1b
@@ -419,6 +512,19 @@ data_access_common:
 	li	r5,0x300
 	b	.do_hash_page	 	/* Try to handle as hpte fault */
 
+	.align  7
+        .globl  h_data_storage_common
+h_data_storage_common:
+        mfspr   r10,SPRN_HDAR
+        std     r10,PACA_EXGEN+EX_DAR(r13)
+        mfspr   r10,SPRN_HDSISR
+        stw     r10,PACA_EXGEN+EX_DSISR(r13)
+        EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+        bl      .save_nvgprs
+        addi    r3,r1,STACK_FRAME_OVERHEAD
+        bl      .unknown_exception
+        b       .ret_from_except
+
 	.align	7
 	.globl instruction_access_common
 instruction_access_common:
@@ -428,6 +534,8 @@ instruction_access_common:
 	li	r5,0x400
 	b	.do_hash_page		/* Try to handle as hpte fault */
 
+        STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+
 /*
  * Here is the common SLB miss user that is used when going to virtual
  * mode for SLB misses, that is currently not used
@@ -750,7 +858,7 @@ _STATIC(do_hash_page)
 BEGIN_FTR_SECTION
 	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
 	bne-	do_ste_alloc		/* If so handle it */
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
 
 	clrrdi	r11,r1,THREAD_SHIFT
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index c5c24beb8387..ba250d505e07 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -805,19 +805,6 @@ _ENTRY(copy_and_flush)
 	blr
 
 #ifdef CONFIG_SMP
-#ifdef CONFIG_GEMINI
-	.globl	__secondary_start_gemini
-__secondary_start_gemini:
-        mfspr   r4,SPRN_HID0
-        ori     r4,r4,HID0_ICFI
-        li      r3,0
-        ori     r3,r3,HID0_ICE
-        andc    r4,r4,r3
-        mtspr   SPRN_HID0,r4
-        sync
-        b       __secondary_start
-#endif /* CONFIG_GEMINI */
-
 	.globl __secondary_start_mpc86xx
 __secondary_start_mpc86xx:
 	mfspr	r3, SPRN_PIR
@@ -890,15 +877,6 @@ __secondary_start:
 	mtspr	SPRN_SRR1,r4
 	SYNC
 	RFI
-
-_GLOBAL(start_secondary_resume)
-	/* Reset stack */
-	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r3,0
-	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	start_secondary
-	b	.
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3a319f9c9d3e..ba504099844a 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -147,6 +147,8 @@ __secondary_hold:
 	mtctr	r4
 	mr	r3,r24
 	li	r4,0
+	/* Make sure that patched code is visible */
+	isync
 	bctr
 #else
 	BUG_OPCODE
@@ -216,19 +218,25 @@ generic_secondary_common_init:
 	 */
 	LOAD_REG_ADDR(r13, paca)	/* Load paca pointer		 */
 	ld	r13,0(r13)		/* Get base vaddr of paca array	 */
+#ifndef CONFIG_SMP
+	addi	r13,r13,PACA_SIZE	/* know r13 if used accidentally */
+	b	.kexec_wait		/* wait for next kernel if !SMP	 */
+#else
+	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
+	lwz	r7,0(r7)		/* also the max paca allocated 	 */
 	li	r5,0			/* logical cpu id                */
 1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
 	cmpw	r6,r24			/* Compare to our id             */
 	beq	2f
 	addi	r13,r13,PACA_SIZE	/* Loop to next PACA on miss     */
 	addi	r5,r5,1
-	cmpwi	r5,NR_CPUS
+	cmpw	r5,r7			/* Check if more pacas exist     */
 	blt	1b
 
 	mr	r3,r24			/* not found, copy phys to r3	 */
 	b	.kexec_wait		/* next kernel might do better	 */
 
-2:	mtspr	SPRN_SPRG_PACA,r13	/* Save vaddr of paca in an SPRG */
+2:	SET_PACA(r13)
 #ifdef CONFIG_PPC_BOOK3E
 	addi	r12,r13,PACA_EXTLB	/* and TLB exc frame in another  */
 	mtspr	SPRN_SPRG_TLB_EXFRAME,r12
@@ -236,34 +244,39 @@ generic_secondary_common_init:
 
 	/* From now on, r24 is expected to be logical cpuid */
 	mr	r24,r5
-3:	HMT_LOW
-	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
-					/* start.			 */
-
-#ifndef CONFIG_SMP
-	b	3b			/* Never go on non-SMP		 */
-#else
-	cmpwi	0,r23,0
-	beq	3b			/* Loop until told to go	 */
-
-	sync				/* order paca.run and cur_cpu_spec */
 
 	/* See if we need to call a cpu state restore handler */
 	LOAD_REG_ADDR(r23, cur_cpu_spec)
 	ld	r23,0(r23)
 	ld	r23,CPU_SPEC_RESTORE(r23)
 	cmpdi	0,r23,0
-	beq	4f
+	beq	3f
 	ld	r23,0(r23)
 	mtctr	r23
 	bctrl
 
-4:	/* Create a temp kernel stack for use before relocation is on.	*/
+3:	LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */
+	lwarx	r4,0,r3
+	subi	r4,r4,1
+	stwcx.	r4,0,r3
+	bne	3b
+	isync
+
+4:	HMT_LOW
+	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
+					/* start.			 */
+	cmpwi	0,r23,0
+	beq	4b			/* Loop until told to go	 */
+
+	sync				/* order paca.run and cur_cpu_spec */
+	isync				/* In case code patching happened */
+
+	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
 	b	__secondary_start
-#endif
+#endif /* SMP */
 
 /*
  * Turn the MMU off.
@@ -534,7 +547,7 @@ _GLOBAL(pmac_secondary_start)
 	ld	r4,0(r4)		/* Get base vaddr of paca array	*/
 	mulli	r13,r24,PACA_SIZE	/* Calculate vaddr of right paca */
 	add	r13,r13,r4		/* for this processor.		*/
-	mtspr	SPRN_SPRG_PACA,r13	/* Save vaddr of paca in an SPRG*/
+	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
 
 	/* Mark interrupts soft and hard disabled (they might be enabled
 	 * in the PACA when doing hotplug)
@@ -645,7 +658,7 @@ _GLOBAL(enable_64b_mode)
 	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
 	mtmsr	r11
 #else /* CONFIG_PPC_BOOK3E */
-	li	r12,(MSR_SF | MSR_ISF)@highest
+	li	r12,(MSR_64BIT | MSR_ISF)@highest
 	sldi	r12,r12,48
 	or	r11,r11,r12
 	mtmsrd	r11
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index c00d4ca1ee15..28581f1ad2c0 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -527,7 +527,7 @@ static int ibmebus_bus_pm_resume_noirq(struct device *dev)
 
 #endif /* !CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
 static int ibmebus_bus_pm_freeze(struct device *dev)
 {
@@ -665,7 +665,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATION */
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define ibmebus_bus_pm_freeze		NULL
 #define ibmebus_bus_pm_thaw		NULL
@@ -676,7 +676,7 @@ static int ibmebus_bus_pm_restore_noirq(struct device *dev)
 #define ibmebus_bus_pm_poweroff_noirq	NULL
 #define ibmebus_bus_pm_restore_noirq	NULL
 
-#endif /* !CONFIG_HIBERNATION */
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
 static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {
 	.prepare = ibmebus_bus_pm_prepare,
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
new file mode 100644
index 000000000000..f8f0bc7f1d4f
--- /dev/null
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -0,0 +1,97 @@
+/*
+ *  This file contains the power_save function for 970-family CPUs.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+
+#undef DEBUG
+
+	.text
+
+_GLOBAL(power7_idle)
+	/* Now check if user or arch enabled NAP mode */
+	LOAD_REG_ADDRBASE(r3,powersave_nap)
+	lwz	r4,ADDROFF(powersave_nap)(r3)
+	cmpwi	0,r4,0
+	beqlr
+
+	/* NAP is a state loss, we create a regs frame on the
+	 * stack, fill it up with the state we care about and
+	 * stick a pointer to it in PACAR1. We really only
+	 * need to save PC, some CR bits and the NV GPRs,
+	 * but for now an interrupt frame will do.
+	 */
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-INT_FRAME_SIZE(r1)
+	std	r0,_LINK(r1)
+	std	r0,_NIP(r1)
+
+#ifndef CONFIG_SMP
+	/* Make sure FPU, VSX etc... are flushed as we may lose
+	 * state when going to nap mode
+	 */
+	bl	.discard_lazy_cpu_state
+#endif /* CONFIG_SMP */
+
+	/* Hard disable interrupts */
+	mfmsr	r9
+	rldicl	r9,r9,48,1
+	rotldi	r9,r9,16
+	mtmsrd	r9,1			/* hard-disable interrupts */
+	li	r0,0
+	stb	r0,PACASOFTIRQEN(r13)	/* we'll hard-enable shortly */
+	stb	r0,PACAHARDIRQEN(r13)
+
+	/* Continue saving state */
+	SAVE_GPR(2, r1)
+	SAVE_NVGPRS(r1)
+	mfcr	r3
+	std	r3,_CCR(r1)
+	std	r9,_MSR(r1)
+	std	r1,PACAR1(r13)
+
+	/* Magic NAP mode enter sequence */
+	std	r0,0(r1)
+	ptesync
+	ld	r0,0(r1)
+1:	cmp	cr0,r0,r0
+	bne	1b
+	PPC_NAP
+	b	.
+
+_GLOBAL(power7_wakeup_loss)
+	GET_PACA(r13)
+	ld	r1,PACAR1(r13)
+	REST_NVGPRS(r1)
+	REST_GPR(2, r1)
+	ld	r3,_CCR(r1)
+	ld	r4,_MSR(r1)
+	ld	r5,_NIP(r1)
+	addi	r1,r1,INT_FRAME_SIZE
+	mtcr	r3
+	mtspr	SPRN_SRR1,r4
+	mtspr	SPRN_SRR0,r5
+	rfid
+
+_GLOBAL(power7_wakeup_noloss)
+	GET_PACA(r13)
+	ld	r1,PACAR1(r13)
+	ld	r4,_MSR(r1)
+	ld	r5,_NIP(r1)
+	addi	r1,r1,INT_FRAME_SIZE
+	mtspr	SPRN_SRR1,r4
+	mtspr	SPRN_SRR0,r5
+	rfid
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 5c1118e31940..ffafaea3d261 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -17,8 +17,7 @@
 #include <asm/machdep.h>
 #include <asm/pgtable.h>
 #include <asm/ppc-pci.h>
-
-#include "io-workarounds.h"
+#include <asm/io-workarounds.h>
 
 #define IOWA_MAX_BUS	8
 
@@ -145,7 +144,19 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
 	return res;
 }
 
-/* Regist new bus to support workaround */
+/* Enable IO workaround */
+static void __devinit io_workaround_init(void)
+{
+	static int io_workaround_inited;
+
+	if (io_workaround_inited)
+		return;
+	ppc_pci_io = iowa_pci_io;
+	ppc_md.ioremap = iowa_ioremap;
+	io_workaround_inited = 1;
+}
+
+/* Register new bus to support workaround */
 void __devinit iowa_register_bus(struct pci_controller *phb,
 			struct ppc_pci_io *ops,
 			int (*initfunc)(struct iowa_bus *, void *), void *data)
@@ -153,6 +164,8 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	struct iowa_bus *bus;
 	struct device_node *np = phb->dn;
 
+	io_workaround_init();
+
 	if (iowa_bus_count >= IOWA_MAX_BUS) {
 		pr_err("IOWA:Too many pci bridges, "
 		       "workarounds disabled for %s\n", np->full_name);
@@ -162,6 +175,7 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	bus = &iowa_busses[iowa_bus_count];
 	bus->phb = phb;
 	bus->ops = ops;
+	bus->private = data;
 
 	if (initfunc)
 		if ((*initfunc)(bus, data))
@@ -172,14 +186,3 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
 }
 
-/* enable IO workaround */
-void __devinit io_workaround_init(void)
-{
-	static int io_workaround_inited;
-
-	if (io_workaround_inited)
-		return;
-	ppc_pci_io = iowa_pci_io;
-	ppc_md.ioremap = iowa_ioremap;
-	io_workaround_inited = 1;
-}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f621b7d2d869..a24d37d4cf51 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -66,7 +66,6 @@
 #include <asm/ptrace.h>
 #include <asm/machdep.h>
 #include <asm/udbg.h>
-#include <asm/dbell.h>
 #include <asm/smp.h>
 
 #ifdef CONFIG_PPC64
@@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en)
 
 #if defined(CONFIG_BOOKE) && defined(CONFIG_SMP)
 	/* Check for pending doorbell interrupts and resend to ourself */
-	doorbell_check_self();
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		smp_muxed_ipi_resend();
 #endif
 
 	/*
@@ -397,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
 void exc_lvl_ctx_init(void)
 {
 	struct thread_info *tp;
-	int i, hw_cpu;
+	int i, cpu_nr;
 
 	for_each_possible_cpu(i) {
-		hw_cpu = get_hard_smp_processor_id(i);
-		memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = critirq_ctx[hw_cpu];
-		tp->cpu = i;
+#ifdef CONFIG_PPC64
+		cpu_nr = i;
+#else
+		cpu_nr = get_hard_smp_processor_id(i);
+#endif
+		memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = critirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = 0;
 
 #ifdef CONFIG_BOOKE
-		memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = dbgirq_ctx[hw_cpu];
-		tp->cpu = i;
+		memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = dbgirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = 0;
 
-		memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = mcheckirq_ctx[hw_cpu];
-		tp->cpu = i;
+		memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = mcheckirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = HARDIRQ_OFFSET;
 #endif
 	}
@@ -477,20 +481,41 @@ void do_softirq(void)
  * IRQ controller and virtual interrupts
  */
 
+/* The main irq map itself is an array of NR_IRQ entries containing the
+ * associate host and irq number. An entry with a host of NULL is free.
+ * An entry can be allocated if it's free, the allocator always then sets
+ * hwirq first to the host's invalid irq number and then fills ops.
+ */
+struct irq_map_entry {
+	irq_hw_number_t	hwirq;
+	struct irq_host	*host;
+};
+
 static LIST_HEAD(irq_hosts);
 static DEFINE_RAW_SPINLOCK(irq_big_lock);
-static unsigned int revmap_trees_allocated;
 static DEFINE_MUTEX(revmap_trees_mutex);
-struct irq_map_entry irq_map[NR_IRQS];
+static struct irq_map_entry irq_map[NR_IRQS];
 static unsigned int irq_virq_count = NR_IRQS;
 static struct irq_host *irq_default_host;
 
+irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
+{
+	return irq_map[d->irq].hwirq;
+}
+EXPORT_SYMBOL_GPL(irqd_to_hwirq);
+
 irq_hw_number_t virq_to_hw(unsigned int virq)
 {
 	return irq_map[virq].hwirq;
 }
 EXPORT_SYMBOL_GPL(virq_to_hw);
 
+bool virq_is_host(unsigned int virq, struct irq_host *host)
+{
+	return irq_map[virq].host == host;
+}
+EXPORT_SYMBOL_GPL(virq_is_host);
+
 static int default_irq_host_match(struct irq_host *h, struct device_node *np)
 {
 	return h->of_node != NULL && h->of_node == np;
@@ -511,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 	/* Allocate structure and revmap table if using linear mapping */
 	if (revmap_type == IRQ_HOST_MAP_LINEAR)
 		size += revmap_arg * sizeof(unsigned int);
-	host = zalloc_maybe_bootmem(size, GFP_KERNEL);
+	host = kzalloc(size, GFP_KERNEL);
 	if (host == NULL)
 		return NULL;
 
@@ -561,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 			irq_map[i].host = host;
 			smp_wmb();
 
-			/* Clear norequest flags */
-			irq_clear_status_flags(i, IRQ_NOREQUEST);
-
 			/* Legacy flags are left to default at this point,
 			 * one can then use irq_create_mapping() to
 			 * explicitly change them
 			 */
 			ops->map(host, i, i);
+
+			/* Clear norequest flags */
+			irq_clear_status_flags(i, IRQ_NOREQUEST);
 		}
 		break;
 	case IRQ_HOST_MAP_LINEAR:
@@ -579,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 		smp_wmb();
 		host->revmap_data.linear.revmap = rmap;
 		break;
+	case IRQ_HOST_MAP_TREE:
+		INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL);
+		break;
 	default:
 		break;
 	}
@@ -636,8 +664,6 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 		goto error;
 	}
 
-	irq_clear_status_flags(virq, IRQ_NOREQUEST);
-
 	/* map it */
 	smp_wmb();
 	irq_map[virq].hwirq = hwirq;
@@ -648,6 +674,8 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 		goto errdesc;
 	}
 
+	irq_clear_status_flags(virq, IRQ_NOREQUEST);
+
 	return 0;
 
 errdesc:
@@ -704,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host,
 	 */
 	virq = irq_find_mapping(host, hwirq);
 	if (virq != NO_IRQ) {
-		if (host->ops->remap)
-			host->ops->remap(host, virq, hwirq);
 		pr_debug("irq: -> existing mapping on virq %d\n", virq);
 		return virq;
 	}
@@ -786,14 +812,15 @@ void irq_dispose_mapping(unsigned int virq)
 		return;
 
 	host = irq_map[virq].host;
-	WARN_ON (host == NULL);
-	if (host == NULL)
+	if (WARN_ON(host == NULL))
 		return;
 
 	/* Never unmap legacy interrupts */
 	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
 		return;
 
+	irq_set_status_flags(virq, IRQ_NOREQUEST);
+
 	/* remove chip and handler */
 	irq_set_chip_and_handler(virq, NULL, NULL);
 
@@ -813,13 +840,6 @@ void irq_dispose_mapping(unsigned int virq)
 			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
 		break;
 	case IRQ_HOST_MAP_TREE:
-		/*
-		 * Check if radix tree allocated yet, if not then nothing to
-		 * remove.
-		 */
-		smp_rmb();
-		if (revmap_trees_allocated < 1)
-			break;
 		mutex_lock(&revmap_trees_mutex);
 		radix_tree_delete(&host->revmap_data.tree, hwirq);
 		mutex_unlock(&revmap_trees_mutex);
@@ -830,8 +850,6 @@ void irq_dispose_mapping(unsigned int virq)
 	smp_mb();
 	irq_map[virq].hwirq = host->inval_irq;
 
-	irq_set_status_flags(virq, IRQ_NOREQUEST);
-
 	irq_free_descs(virq, 1);
 	/* Free it */
 	irq_free_virt(virq, 1);
@@ -877,16 +895,9 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 	struct irq_map_entry *ptr;
 	unsigned int virq;
 
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
-	/*
-	 * Check if the radix tree exists and has bee initialized.
-	 * If not, we fallback to slow mode
-	 */
-	if (revmap_trees_allocated < 2)
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE))
 		return irq_find_mapping(host, hwirq);
 
-	/* Now try to resolve */
 	/*
 	 * No rcu_read_lock(ing) needed, the ptr returned can't go under us
 	 * as it's referencing an entry in the static irq_map table.
@@ -909,16 +920,7 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
 			     irq_hw_number_t hwirq)
 {
-
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
-	/*
-	 * Check if the radix tree exists yet.
-	 * If not, then the irq will be inserted into the tree when it gets
-	 * initialized.
-	 */
-	smp_rmb();
-	if (revmap_trees_allocated < 1)
+	if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE))
 		return;
 
 	if (virq != NO_IRQ) {
@@ -934,7 +936,8 @@ unsigned int irq_linear_revmap(struct irq_host *host,
 {
 	unsigned int *revmap;
 
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR))
+		return irq_find_mapping(host, hwirq);
 
 	/* Check revmap bounds */
 	if (unlikely(hwirq >= host->revmap_data.linear.size))
@@ -1028,53 +1031,6 @@ int arch_early_irq_init(void)
 	return 0;
 }
 
-/* We need to create the radix trees late */
-static int irq_late_init(void)
-{
-	struct irq_host *h;
-	unsigned int i;
-
-	/*
-	 * No mutual exclusion with respect to accessors of the tree is needed
-	 * here as the synchronization is done via the state variable
-	 * revmap_trees_allocated.
-	 */
-	list_for_each_entry(h, &irq_hosts, link) {
-		if (h->revmap_type == IRQ_HOST_MAP_TREE)
-			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
-	}
-
-	/*
-	 * Make sure the radix trees inits are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 1;
-
-	/*
-	 * Insert the reverse mapping for those interrupts already present
-	 * in irq_map[].
-	 */
-	mutex_lock(&revmap_trees_mutex);
-	for (i = 0; i < irq_virq_count; i++) {
-		if (irq_map[i].host &&
-		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
-			radix_tree_insert(&irq_map[i].host->revmap_data.tree,
-					  irq_map[i].hwirq, &irq_map[i]);
-	}
-	mutex_unlock(&revmap_trees_mutex);
-
-	/*
-	 * Make sure the radix trees insertions are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 2;
-
-	return 0;
-}
-arch_initcall(irq_late_init);
-
 #ifdef CONFIG_VIRQ_DEBUG
 static int virq_debug_show(struct seq_file *m, void *private)
 {
@@ -1082,10 +1038,11 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	struct irq_desc *desc;
 	const char *p;
 	static const char none[] = "none";
+	void *data;
 	int i;
 
-	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
-		      "chip name", "host name");
+	seq_printf(m, "%-5s  %-7s  %-15s  %-18s  %s\n", "virq", "hwirq",
+		      "chip name", "chip data", "host name");
 
 	for (i = 1; i < nr_irqs; i++) {
 		desc = irq_to_desc(i);
@@ -1098,7 +1055,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 			struct irq_chip *chip;
 
 			seq_printf(m, "%5d  ", i);
-			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
+			seq_printf(m, "0x%05lx  ", irq_map[i].hwirq);
 
 			chip = irq_desc_get_chip(desc);
 			if (chip && chip->name)
@@ -1107,6 +1064,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
 				p = none;
 			seq_printf(m, "%-15s  ", p);
 
+			data = irq_desc_get_chip_data(desc);
+			seq_printf(m, "0x%16p  ", data);
+
 			if (irq_map[i].host && irq_map[i].host->of_node)
 				p = irq_map[i].host->of_node->full_name;
 			else
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 42850ee00ada..bd9d35f59cf4 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs)
 #ifdef CONFIG_SMP
 void kgdb_roundup_cpus(unsigned long flags)
 {
-	smp_send_debugger_break(MSG_ALL_BUT_SELF);
+	smp_send_debugger_break();
 }
 #endif
 
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index c834757bebc0..2b97b80d6d7d 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -330,9 +330,11 @@ void __init find_legacy_serial_ports(void)
 		if (!parent)
 			continue;
 		if (of_match_node(legacy_serial_parents, parent) != NULL) {
-			index = add_legacy_soc_port(np, np);
-			if (index >= 0 && np == stdout)
-				legacy_serial_console = index;
+			if (of_device_is_available(np)) {
+				index = add_legacy_soc_port(np, np);
+				if (index >= 0 && np == stdout)
+					legacy_serial_console = index;
+			}
 		}
 		of_node_put(parent);
 	}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 301db65f05a1..84daabe2fcba 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
 /*
  * Methods used to fetch LPAR data when running on a pSeries platform.
  */
-/**
- * h_get_mpp
- * H_GET_MPP hcall returns info in 7 parms
- */
-int h_get_mpp(struct hvcall_mpp_data *mpp_data)
-{
-	int rc;
-	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
-
-	rc = plpar_hcall9(H_GET_MPP, retbuf);
-
-	mpp_data->entitled_mem = retbuf[0];
-	mpp_data->mapped_mem = retbuf[1];
-
-	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
-	mpp_data->pool_num = retbuf[2] & 0xffff;
-
-	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
-	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
-	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
-
-	mpp_data->pool_size = retbuf[4];
-	mpp_data->loan_request = retbuf[5];
-	mpp_data->backing_mem = retbuf[6];
-
-	return rc;
-}
-EXPORT_SYMBOL(h_get_mpp);
 
 struct hvcall_ppp_data {
 	u64	entitlement;
@@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m)
 	seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
 }
 
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (!firmware_has_feature(FW_FEATURE_XCMO))
+		return;
+	if (h_get_mpp_x(&mpp_x_data))
+		return;
+
+	seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+	if (mpp_x_data.pool_coalesced_bytes)
+		seq_printf(m, "pool_coalesced_bytes=%ld\n",
+			   mpp_x_data.pool_coalesced_bytes);
+	if (mpp_x_data.pool_purr_cycles)
+		seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+	if (mpp_x_data.pool_spurr_cycles)
+		seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
 #define SPLPAR_CHARACTERISTICS_TOKEN 20
 #define SPLPAR_MAXLENGTH 1026*(sizeof(char))
 
@@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		parse_system_parameter_string(m);
 		parse_ppp_data(m);
 		parse_mpp_data(m);
+		parse_mpp_x_data(m);
 		pseries_cmo_data(m);
 		splpar_dispatch_data(m);
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 094bd9821ad4..402560e957bd 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -694,6 +694,17 @@ _GLOBAL(kernel_thread)
 	addi	r1,r1,16
 	blr
 
+#ifdef CONFIG_SMP
+_GLOBAL(start_secondary_resume)
+	/* Reset stack */
+	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r3,0
+	std	r3,0(r1)		/* Zero the stack frame pointer	*/
+	bl	start_secondary
+	b	.
+#endif /* CONFIG_SMP */
+	
 /*
  * This routine is just here to keep GCC happy - sigh...
  */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 206a321a71d3..e89df59cdc5a 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -462,7 +462,8 @@ _GLOBAL(disable_kernel_fp)
  * wait for the flag to change, indicating this kernel is going away but
  * the slave code for the next one is at addresses 0 to 100.
  *
- * This is used by all slaves.
+ * This is used by all slaves, even those that did not find a matching
+ * paca in the secondary startup code.
  *
  * Physical (hardware) cpu id should be in r3.
  */
@@ -471,10 +472,6 @@ _GLOBAL(kexec_wait)
 1:	mflr	r5
 	addi	r5,r5,kexec_flag-1b
 
-	li	r4,KEXEC_STATE_REAL_MODE
-	stb	r4,PACAKEXECSTATE(r13)
-	SYNC
-
 99:	HMT_LOW
 #ifdef CONFIG_KEXEC		/* use no memory without kexec */
 	lwz	r4,0(r5)
@@ -499,11 +496,17 @@ kexec_flag:
  *
  * get phys id from paca
  * switch to real mode
+ * mark the paca as no longer used
  * join other cpus in kexec_wait(phys_id)
  */
 _GLOBAL(kexec_smp_wait)
 	lhz	r3,PACAHWCPUID(r13)
 	bl	real_mode
+
+	li	r4,KEXEC_STATE_REAL_MODE
+	stb	r4,PACAKEXECSTATE(r13)
+	SYNC
+
 	b	.kexec_wait
 
 /*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 10f0aadee95b..efeb88184182 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -7,7 +7,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/threads.h>
+#include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/memblock.h>
 
@@ -156,18 +156,29 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 /* Put the paca pointer into r13 and SPRG_PACA */
 void setup_paca(struct paca_struct *new_paca)
 {
+	/* Setup r13 */
 	local_paca = new_paca;
-	mtspr(SPRN_SPRG_PACA, local_paca);
+
 #ifdef CONFIG_PPC_BOOK3E
+	/* On Book3E, initialize the TLB miss exception frames */
 	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#else
+	/* In HV mode, we setup both HPACA and PACA to avoid problems
+	 * if we do a GET_PACA() before the feature fixups have been
+	 * applied
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE_206))
+		mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
+	mtspr(SPRN_SPRG_PACA, local_paca);
+
 }
 
 static int __initdata paca_size;
 
 void __init allocate_pacas(void)
 {
-	int nr_cpus, cpu, limit;
+	int cpu, limit;
 
 	/*
 	 * We can't take SLB misses on the paca, and we want to access them
@@ -179,23 +190,18 @@ void __init allocate_pacas(void)
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		limit = min(limit, HvPagesToMap * HVPAGESIZE);
 
-	nr_cpus = NR_CPUS;
-	/* On iSeries we know we can never have more than 64 cpus */
-	if (firmware_has_feature(FW_FEATURE_ISERIES))
-		nr_cpus = min(64, nr_cpus);
-
-	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus);
+	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
 
 	paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
 	memset(paca, 0, paca_size);
 
 	printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
-		paca_size, nr_cpus, paca);
+		paca_size, nr_cpu_ids, paca);
 
-	allocate_lppacas(nr_cpus, limit);
+	allocate_lppacas(nr_cpu_ids, limit);
 
 	/* Can't use for_each_*_cpu, as they aren't functional yet */
-	for (cpu = 0; cpu < nr_cpus; cpu++)
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		initialise_paca(&paca[cpu], cpu);
 }
 
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index d225d99fe39d..6baabc13306a 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 	const u32 *regs;
 	struct pci_dn *pdn;
 
-	pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
+	pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
 	if (pdn == NULL)
 		return NULL;
-	memset(pdn, 0, sizeof(*pdn));
 	dn->data = pdn;
 	pdn->node = dn;
 	pdn->phb = phb;
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c4063b7f49a0..822f63008ae1 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -398,6 +398,25 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
 	return 0;
 }
 
+static u64 check_and_compute_delta(u64 prev, u64 val)
+{
+	u64 delta = (val - prev) & 0xfffffffful;
+
+	/*
+	 * POWER7 can roll back counter values, if the new value is smaller
+	 * than the previous value it will cause the delta and the counter to
+	 * have bogus values unless we rolled a counter over.  If a coutner is
+	 * rolled back, it will be smaller, but within 256, which is the maximum
+	 * number of events to rollback at once.  If we dectect a rollback
+	 * return 0.  This can lead to a small lack of precision in the
+	 * counters.
+	 */
+	if (prev > val && (prev - val) < 256)
+		delta = 0;
+
+	return delta;
+}
+
 static void power_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
@@ -416,10 +435,11 @@ static void power_pmu_read(struct perf_event *event)
 		prev = local64_read(&event->hw.prev_count);
 		barrier();
 		val = read_pmc(event->hw.idx);
+		delta = check_and_compute_delta(prev, val);
+		if (!delta)
+			return;
 	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
-	/* The counters are only 32 bits wide */
-	delta = (val - prev) & 0xfffffffful;
 	local64_add(delta, &event->count);
 	local64_sub(delta, &event->hw.period_left);
 }
@@ -449,8 +469,9 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
 		prev = local64_read(&event->hw.prev_count);
 		event->hw.idx = 0;
-		delta = (val - prev) & 0xfffffffful;
-		local64_add(delta, &event->count);
+		delta = check_and_compute_delta(prev, val);
+		if (delta)
+			local64_add(delta, &event->count);
 	}
 }
 
@@ -458,14 +479,16 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
 				  unsigned long pmc5, unsigned long pmc6)
 {
 	struct perf_event *event;
-	u64 val;
+	u64 val, prev;
 	int i;
 
 	for (i = 0; i < cpuhw->n_limited; ++i) {
 		event = cpuhw->limited_counter[i];
 		event->hw.idx = cpuhw->limited_hwidx[i];
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
-		local64_set(&event->hw.prev_count, val);
+		prev = local64_read(&event->hw.prev_count);
+		if (check_and_compute_delta(prev, val))
+			local64_set(&event->hw.prev_count, val);
 		perf_event_update_userpage(event);
 	}
 }
@@ -1197,7 +1220,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
-	delta = (val - prev) & 0xfffffffful;
+	delta = check_and_compute_delta(prev, val);
 	local64_add(delta, &event->count);
 
 	/*
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ef3ef566235e..7d28f540200c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs);
 extern int sys_sigreturn(struct pt_regs *regs);
 
 EXPORT_SYMBOL(clear_pages);
-EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
 EXPORT_SYMBOL(DMA_MODE_READ);
 EXPORT_SYMBOL(DMA_MODE_WRITE);
@@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(copy_4K_page);
-#endif
+EXPORT_SYMBOL(copy_page);
 
 #if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
 EXPORT_SYMBOL(isa_io_base);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f74f355a9617..095043d79946 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -702,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk)
 /*
  * Copy a thread..
  */
+extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
+
 int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long unused, struct task_struct *p,
 		struct pt_regs *regs)
@@ -755,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 				_ALIGN_UP(sizeof(struct thread_info), 16);
 
 #ifdef CONFIG_PPC_STD_MMU_64
-	if (cpu_has_feature(CPU_FTR_SLB)) {
+	if (mmu_has_feature(MMU_FTR_SLB)) {
 		unsigned long sp_vsid;
 		unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
 
-		if (cpu_has_feature(CPU_FTR_1T_SEGMENT))
+		if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 			sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
 				<< SLB_VSID_SHIFT_1T;
 		else
@@ -769,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		p->thread.ksp_vsid = sp_vsid;
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC64 
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		if (current->thread.dscr_inherit) {
+			p->thread.dscr_inherit = 1;
+			p->thread.dscr = current->thread.dscr;
+		} else if (0 != dscr_default) {
+			p->thread.dscr_inherit = 1;
+			p->thread.dscr = dscr_default;
+		} else {
+			p->thread.dscr_inherit = 0;
+			p->thread.dscr = 0;
+		}
+	}
+#endif
 
 	/*
 	 * The PPC64 ABI makes use of a TOC to contain function 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index e74fa12afc82..48aeb55faae9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -68,6 +68,7 @@ int __initdata iommu_force_on;
 unsigned long tce_alloc_start, tce_alloc_end;
 u64 ppc64_rma_size;
 #endif
+static phys_addr_t first_memblock_size;
 
 static int __init early_parse_mem(char *p)
 {
@@ -123,18 +124,19 @@ static void __init move_device_tree(void)
  */
 static struct ibm_pa_feature {
 	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
+	unsigned long	mmu_features;	/* MMU_FTR_xxx bit */
 	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
 	unsigned char	pabyte;		/* byte number in ibm,pa-features */
 	unsigned char	pabit;		/* bit number (big-endian) */
 	unsigned char	invert;		/* if 1, pa bit set => clear feature */
 } ibm_pa_features[] __initdata = {
-	{0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
-	{0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
-	{CPU_FTR_SLB, 0,		0, 2, 0},
-	{CPU_FTR_CTRL, 0,		0, 3, 0},
-	{CPU_FTR_NOEXECUTE, 0,		0, 6, 0},
-	{CPU_FTR_NODSISRALIGN, 0,	1, 1, 1},
-	{CPU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
+	{0, 0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
+	{0, 0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
+	{0, MMU_FTR_SLB, 0,		0, 2, 0},
+	{CPU_FTR_CTRL, 0, 0,		0, 3, 0},
+	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
+	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
+	{0, MMU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
 	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
 };
 
@@ -166,9 +168,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,
 		if (bit ^ fp->invert) {
 			cur_cpu_spec->cpu_features |= fp->cpu_features;
 			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+			cur_cpu_spec->mmu_features |= fp->mmu_features;
 		} else {
 			cur_cpu_spec->cpu_features &= ~fp->cpu_features;
 			cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+			cur_cpu_spec->mmu_features &= ~fp->mmu_features;
 		}
 	}
 }
@@ -268,13 +272,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
 {
-	static int logical_cpuid = 0;
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 	const u32 *prop;
 	const u32 *intserv;
 	int i, nthreads;
 	unsigned long len;
-	int found = 0;
+	int found = -1;
+	int found_thread = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -298,11 +302,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		 * version 2 of the kexec param format adds the phys cpuid of
 		 * booted proc.
 		 */
-		if (initial_boot_params && initial_boot_params->version >= 2) {
-			if (intserv[i] ==
-					initial_boot_params->boot_cpuid_phys) {
-				found = 1;
-				break;
+		if (initial_boot_params->version >= 2) {
+			if (intserv[i] == initial_boot_params->boot_cpuid_phys) {
+				found = boot_cpu_count;
+				found_thread = i;
 			}
 		} else {
 			/*
@@ -311,23 +314,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 			 * off secondary threads.
 			 */
 			if (of_get_flat_dt_prop(node,
-					"linux,boot-cpu", NULL) != NULL) {
-				found = 1;
-				break;
-			}
+					"linux,boot-cpu", NULL) != NULL)
+				found = boot_cpu_count;
 		}
-
 #ifdef CONFIG_SMP
 		/* logical cpu id is always 0 on UP kernels */
-		logical_cpuid++;
+		boot_cpu_count++;
 #endif
 	}
 
-	if (found) {
-		DBG("boot cpu: logical %d physical %d\n", logical_cpuid,
-			intserv[i]);
-		boot_cpuid = logical_cpuid;
-		set_hard_smp_processor_id(boot_cpuid, intserv[i]);
+	if (found >= 0) {
+		DBG("boot cpu: logical %d physical %d\n", found,
+			intserv[found_thread]);
+		boot_cpuid = found;
+		set_hard_smp_processor_id(found, intserv[found_thread]);
 
 		/*
 		 * PAPR defines "logical" PVR values for cpus that
@@ -509,11 +509,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 			size = 0x80000000ul - base;
 	}
 #endif
-
-	/* First MEMBLOCK added, do some special initializations */
-	if (memstart_addr == ~(phys_addr_t)0)
-		setup_initial_memory_limit(base, size);
-	memstart_addr = min((u64)memstart_addr, base);
+	/* Keep track of the beginning of memory -and- the size of
+	 * the very first block in the device-tree as it represents
+	 * the RMA on ppc64 server
+	 */
+	if (base < memstart_addr) {
+		memstart_addr = base;
+		first_memblock_size = size;
+	}
 
 	/* Add the chunk to the MEMBLOCK list */
 	memblock_add(base, size);
@@ -698,6 +701,7 @@ void __init early_init_devtree(void *params)
 
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+	setup_initial_memory_limit(memstart_addr, first_memblock_size);
 
 	/* Save command line for /proc/cmdline and then parse parameters */
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 941ff4dbc567..c016033ba78d 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...)
 	const char *p, *q, *s;
 	va_list args;
 	unsigned long v;
+	long vs;
 	struct prom_t *_prom = &RELOC(prom);
 
 	va_start(args, format);
@@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...)
 			v = va_arg(args, unsigned long);
 			prom_print_hex(v);
 			break;
+		case 'd':
+			++q;
+			vs = va_arg(args, int);
+			if (vs < 0) {
+				prom_print(RELOC("-"));
+				vs = -vs;
+			}
+			prom_print_dec(vs);
+			break;
 		case 'l':
 			++q;
-			if (*q == 'u') { /* '%lu' */
+			if (*q == 0)
+				break;
+			else if (*q == 'x') {
+				++q;
+				v = va_arg(args, unsigned long);
+				prom_print_hex(v);
+			} else if (*q == 'u') { /* '%lu' */
 				++q;
 				v = va_arg(args, unsigned long);
 				prom_print_dec(v);
+			} else if (*q == 'd') { /* %ld */
+				++q;
+				vs = va_arg(args, long);
+				if (vs < 0) {
+					prom_print(RELOC("-"));
+					vs = -vs;
+				}
+				prom_print_dec(vs);
 			}
 			break;
 		}
@@ -676,8 +700,10 @@ static void __init early_cmdline_parse(void)
 #endif /* CONFIG_PCI_MSI */
 #ifdef CONFIG_PPC_SMLPAR
 #define OV5_CMO			0x80	/* Cooperative Memory Overcommitment */
+#define OV5_XCMO			0x40	/* Page Coalescing */
 #else
 #define OV5_CMO			0x00
+#define OV5_XCMO			0x00
 #endif
 #define OV5_TYPE1_AFFINITY	0x80	/* Type 1 NUMA affinity */
 
@@ -732,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = {
 	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
 	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
 	0,
-	OV5_CMO,
+	OV5_CMO | OV5_XCMO,
 	OV5_TYPE1_AFFINITY,
 	0,
 	0,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 55613e33e263..a6ae1cfad86c 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -933,12 +933,16 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	if (data && !(data & DABR_TRANSLATION))
 		return -EIO;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
+	if (ptrace_get_breakpoints(task) < 0)
+		return -ESRCH;
+
 	bp = thread->ptrace_bps[0];
 	if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) {
 		if (bp) {
 			unregister_hw_breakpoint(bp);
 			thread->ptrace_bps[0] = NULL;
 		}
+		ptrace_put_breakpoints(task);
 		return 0;
 	}
 	if (bp) {
@@ -948,9 +952,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 					(DABR_DATA_WRITE | DABR_DATA_READ),
 							&attr.bp_type);
 		ret =  modify_user_hw_breakpoint(bp, &attr);
-		if (ret)
+		if (ret) {
+			ptrace_put_breakpoints(task);
 			return ret;
+		}
 		thread->ptrace_bps[0] = bp;
+		ptrace_put_breakpoints(task);
 		thread->dabr = data;
 		return 0;
 	}
@@ -965,9 +972,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 							ptrace_triggered, task);
 	if (IS_ERR(bp)) {
 		thread->ptrace_bps[0] = NULL;
+		ptrace_put_breakpoints(task);
 		return PTR_ERR(bp);
 	}
 
+	ptrace_put_breakpoints(task);
+
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 	/* Move contents to the DABR register */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 2097f2b3cba8..271ff6318eda 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -42,6 +42,7 @@
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
+#include <asm/pSeries_reconfig.h>
 
 struct rtas_t rtas = {
 	.lock = __ARCH_SPIN_LOCK_UNLOCKED
@@ -494,7 +495,7 @@ unsigned int rtas_busy_delay(int status)
 
 	might_sleep();
 	ms = rtas_busy_delay_time(status);
-	if (ms)
+	if (ms && need_resched())
 		msleep(ms);
 
 	return ms;
@@ -731,6 +732,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
 
 	atomic_set(&data->error, rc);
 	start_topology_update();
+	pSeries_coalesce_init();
 
 	if (wake_when_done) {
 		atomic_set(&data->done, 1);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 21f30cb68077..79fca2651b65 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
 	int i;
 
 	threads_per_core = tpc;
-	threads_core_mask = CPU_MASK_NONE;
+	cpumask_clear(&threads_core_mask);
 
 	/* This implementation only supports power of 2 number of threads
 	 * for simplicity and performance
@@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
 	BUG_ON(tpc != (1 << threads_shift));
 
 	for (i = 0; i < tpc; i++)
-		cpu_set(i, threads_core_mask);
+		cpumask_set_cpu(i, &threads_core_mask);
 
 	printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
 	       tpc, tpc > 1 ? "s" : "");
@@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
  *                  cpu_present_mask
  *
  * Having the possible map set up early allows us to restrict allocations
- * of things like irqstacks to num_possible_cpus() rather than NR_CPUS.
+ * of things like irqstacks to nr_cpu_ids rather than NR_CPUS.
  *
  * We do not initialize the online map here; cpus set their own bits in
  * cpu_online_mask as they come up.
@@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void)
 
 	DBG("smp_setup_cpu_maps()\n");
 
-	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
+	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
 		const int *intserv;
 		int j, len;
 
@@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void)
 				intserv = &cpu;	/* assume logical == phys */
 		}
 
-		for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
+		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
 			DBG("    thread %d -> cpu %d (hard id %d)\n",
 			    j, cpu, intserv[j]);
 			set_cpu_present(cpu, true);
@@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void)
 		if (cpu_has_feature(CPU_FTR_SMT))
 			maxcpus *= nthreads;
 
-		if (maxcpus > NR_CPUS) {
+		if (maxcpus > nr_cpu_ids) {
 			printk(KERN_WARNING
 			       "Partition configured for %d cpus, "
 			       "operating system maximum is %d.\n",
-			       maxcpus, NR_CPUS);
-			maxcpus = NR_CPUS;
+			       maxcpus, nr_cpu_ids);
+			maxcpus = nr_cpu_ids;
 		} else
 			printk(KERN_INFO "Partition configured for %d cpus.\n",
 			       maxcpus);
@@ -510,7 +510,7 @@ void __init smp_setup_cpu_maps(void)
 	cpu_init_thread_core_maps(nthreads);
 
 	/* Now that possible cpus are set, set nr_cpu_ids for later use */
-	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
+	setup_nr_cpu_ids();
 
 	free_unused_pacas();
 }
@@ -602,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port)
 		 * name instead */
 		if (!np)
 			np = of_find_node_by_name(NULL, "8042");
+		if (np) {
+			of_i8042_kbd_irq = 1;
+			of_i8042_aux_irq = 12;
+		}
 		break;
 	case FDC_BASE: /* FDC1 */
 		np = of_find_node_by_type(NULL, "fdc");
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 1d2fbc905303..620d792b52e4 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys);
 
 int boot_cpuid = -1;
 EXPORT_SYMBOL_GPL(boot_cpuid);
+int __initdata boot_cpu_count;
 int boot_cpuid_phys;
 
 int smp_hw_index[NR_CPUS];
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5a0401fcaebd..a88bf2713d41 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -62,6 +62,7 @@
 #include <asm/udbg.h>
 #include <asm/kexec.h>
 #include <asm/mmu_context.h>
+#include <asm/code-patching.h>
 
 #include "setup.h"
 
@@ -72,6 +73,7 @@
 #endif
 
 int boot_cpuid = 0;
+int __initdata boot_cpu_count;
 u64 ppc64_pft_size;
 
 /* Pick defaults since we might want to patch instructions
@@ -233,6 +235,7 @@ void early_setup_secondary(void)
 void smp_release_cpus(void)
 {
 	unsigned long *ptr;
+	int i;
 
 	DBG(" -> smp_release_cpus()\n");
 
@@ -245,7 +248,16 @@ void smp_release_cpus(void)
 	ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
 			- PHYSICAL_START);
 	*ptr = __pa(generic_secondary_smp_init);
-	mb();
+
+	/* And wait a bit for them to catch up */
+	for (i = 0; i < 100000; i++) {
+		mb();
+		HMT_low();
+		if (boot_cpu_count == 0)
+			break;
+		udelay(1);
+	}
+	DBG("boot_cpu_count = %d\n", boot_cpu_count);
 
 	DBG(" <- smp_release_cpus()\n");
 }
@@ -423,17 +435,30 @@ void __init setup_system(void)
 	DBG(" <- setup_system()\n");
 }
 
-static u64 slb0_limit(void)
+/* This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause a TLB or SLB miss. This is
+ * used to allocate interrupt or emergency stacks for which our
+ * exception entry path doesn't deal with being interrupted.
+ */
+static u64 safe_stack_limit(void)
 {
-	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+#ifdef CONFIG_PPC_BOOK3E
+	/* Freescale BookE bolts the entire linear mapping */
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+		return linear_map_top;
+	/* Other BookE, we assume the first GB is bolted */
+	return 1ul << 30;
+#else
+	/* BookS, the first segment is bolted */
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		return 1UL << SID_SHIFT_1T;
-	}
 	return 1UL << SID_SHIFT;
+#endif
 }
 
 static void __init irqstack_early_init(void)
 {
-	u64 limit = slb0_limit();
+	u64 limit = safe_stack_limit();
 	unsigned int i;
 
 	/*
@@ -453,6 +478,9 @@ static void __init irqstack_early_init(void)
 #ifdef CONFIG_PPC_BOOK3E
 static void __init exc_lvl_early_init(void)
 {
+	extern unsigned int interrupt_base_book3e;
+	extern unsigned int exc_debug_debug_book3e;
+
 	unsigned int i;
 
 	for_each_possible_cpu(i) {
@@ -463,6 +491,10 @@ static void __init exc_lvl_early_init(void)
 		mcheckirq_ctx[i] = (struct thread_info *)
 			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
+
+	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+		patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
+			     (unsigned long)&exc_debug_debug_book3e, 0);
 }
 #else
 #define exc_lvl_early_init()
@@ -486,7 +518,7 @@ static void __init emergency_stack_init(void)
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
 	 */
-	limit = min(slb0_limit(), ppc64_rma_size);
+	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		unsigned long sp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 27c4a4584f80..da989fff19cc 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -381,7 +381,7 @@ badframe:
 	       regs, uc, &uc->uc_mcontext);
 #endif
 	if (show_unhandled_signals && printk_ratelimit())
-		printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+		printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 			current->comm, current->pid, "rt_sigreturn",
 			(long)uc, regs->nip, regs->link);
 
@@ -469,7 +469,7 @@ badframe:
 	       regs, frame, newsp);
 #endif
 	if (show_unhandled_signals && printk_ratelimit())
-		printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+		printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 			current->comm, current->pid, "setup_rt_frame",
 			(long)frame, regs->nip, regs->link);
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cbdbb14be4b0..4a6f2ec7e761 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -95,7 +95,7 @@ int smt_enabled_at_boot = 1;
 static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
 
 #ifdef CONFIG_PPC64
-void __devinit smp_generic_kick_cpu(int nr)
+int __devinit smp_generic_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
@@ -106,37 +106,10 @@ void __devinit smp_generic_kick_cpu(int nr)
 	 */
 	paca[nr].cpu_start = 1;
 	smp_mb();
-}
-#endif
 
-void smp_message_recv(int msg)
-{
-	switch(msg) {
-	case PPC_MSG_CALL_FUNCTION:
-		generic_smp_call_function_interrupt();
-		break;
-	case PPC_MSG_RESCHEDULE:
-		/* we notice need_resched on exit */
-		break;
-	case PPC_MSG_CALL_FUNC_SINGLE:
-		generic_smp_call_function_single_interrupt();
-		break;
-	case PPC_MSG_DEBUGGER_BREAK:
-		if (crash_ipi_function_ptr) {
-			crash_ipi_function_ptr(get_irq_regs());
-			break;
-		}
-#ifdef CONFIG_DEBUGGER
-		debugger_ipi(get_irq_regs());
-		break;
-#endif /* CONFIG_DEBUGGER */
-		/* FALLTHROUGH */
-	default:
-		printk("SMP %d: smp_message_recv(): unknown msg %d\n",
-		       smp_processor_id(), msg);
-		break;
-	}
+	return 0;
 }
+#endif
 
 static irqreturn_t call_function_action(int irq, void *data)
 {
@@ -146,7 +119,7 @@ static irqreturn_t call_function_action(int irq, void *data)
 
 static irqreturn_t reschedule_action(int irq, void *data)
 {
-	/* we just need the return path side effect of checking need_resched */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
@@ -156,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t debug_ipi_action(int irq, void *data)
+irqreturn_t debug_ipi_action(int irq, void *data)
 {
-	smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+	if (crash_ipi_function_ptr) {
+		crash_ipi_function_ptr(get_irq_regs());
+		return IRQ_HANDLED;
+	}
+
+#ifdef CONFIG_DEBUGGER
+	debugger_ipi(get_irq_regs());
+#endif /* CONFIG_DEBUGGER */
+
 	return IRQ_HANDLED;
 }
 
@@ -197,6 +178,66 @@ int smp_request_message_ipi(int virq, int msg)
 	return err;
 }
 
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+struct cpu_messages {
+	int messages;			/* current messages */
+	unsigned long data;		/* data for cause ipi */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
+
+void smp_muxed_ipi_set_data(int cpu, unsigned long data)
+{
+	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+	info->data = data;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+	char *message = (char *)&info->messages;
+
+	message[msg] = 1;
+	mb();
+	smp_ops->cause_ipi(cpu, info->data);
+}
+
+void smp_muxed_ipi_resend(void)
+{
+	struct cpu_messages *info = &__get_cpu_var(ipi_message);
+
+	if (info->messages)
+		smp_ops->cause_ipi(smp_processor_id(), info->data);
+}
+
+irqreturn_t smp_ipi_demux(void)
+{
+	struct cpu_messages *info = &__get_cpu_var(ipi_message);
+	unsigned int all;
+
+	mb();	/* order any irq clear */
+
+	do {
+		all = xchg_local(&info->messages, 0);
+
+#ifdef __BIG_ENDIAN
+		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
+			generic_smp_call_function_interrupt();
+		if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
+			scheduler_ipi();
+		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
+			generic_smp_call_function_single_interrupt();
+		if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
+			debug_ipi_action(0, NULL);
+#else
+#error Unsupported ENDIAN
+#endif
+	} while (info->messages);
+
+	return IRQ_HANDLED;
+}
+#endif /* CONFIG_PPC_SMP_MUXED_IPI */
+
 void smp_send_reschedule(int cpu)
 {
 	if (likely(smp_ops))
@@ -216,11 +257,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 		smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
 }
 
-#ifdef CONFIG_DEBUGGER
-void smp_send_debugger_break(int cpu)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+void smp_send_debugger_break(void)
 {
-	if (likely(smp_ops))
-		smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+	int cpu;
+	int me = raw_smp_processor_id();
+
+	if (unlikely(!smp_ops))
+		return;
+
+	for_each_online_cpu(cpu)
+		if (cpu != me)
+			smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
 }
 #endif
 
@@ -228,9 +276,9 @@ void smp_send_debugger_break(int cpu)
 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 {
 	crash_ipi_function_ptr = crash_ipi_callback;
-	if (crash_ipi_callback && smp_ops) {
+	if (crash_ipi_callback) {
 		mb();
-		smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK);
+		smp_send_debugger_break();
 	}
 }
 #endif
@@ -410,8 +458,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
 {
 	int rc, c;
 
-	secondary_ti = current_set[cpu];
-
 	if (smp_ops == NULL ||
 	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
 		return -EINVAL;
@@ -421,6 +467,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	if (rc)
 		return rc;
 
+	secondary_ti = current_set[cpu];
+
 	/* Make sure callin-map entry is 0 (can be leftover a CPU
 	 * hotplug
 	 */
@@ -434,7 +482,11 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
 	/* wake up cpus */
 	DBG("smp: kicking cpu %d\n", cpu);
-	smp_ops->kick_cpu(cpu);
+	rc = smp_ops->kick_cpu(cpu);
+	if (rc) {
+		pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
+		return rc;
+	}
 
 	/*
 	 * wait to see if the cpu made a callin (is actually up).
@@ -507,7 +559,7 @@ int cpu_first_thread_of_core(int core)
 }
 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
 
-/* Must be called when no change can occur to cpu_present_map,
+/* Must be called when no change can occur to cpu_present_mask,
  * i.e. during cpu online or offline.
  */
 static struct device_node *cpu_to_l2cache(int cpu)
@@ -608,7 +660,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	 * se we pin us down to CPU 0 for a short while
 	 */
 	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
-	cpumask_copy(old_mask, &current->cpus_allowed);
+	cpumask_copy(old_mask, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
 	
 	if (smp_ops && smp_ops->setup_cpu)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c0d8c2006bf4..f0f2199e64e1 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL);
 static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr);
 static SYSDEV_ATTR(purr, 0600, show_purr, store_purr);
+
+unsigned long dscr_default = 0;
+EXPORT_SYMBOL(dscr_default);
+
+static ssize_t show_dscr_default(struct sysdev_class *class,
+		struct sysdev_class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lx\n", dscr_default);
+}
+
+static ssize_t __used store_dscr_default(struct sysdev_class *class,
+		struct sysdev_class_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int ret = 0;
+	
+	ret = sscanf(buf, "%lx", &val);
+	if (ret != 1)
+		return -EINVAL;
+	dscr_default = val;
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(dscr_default, 0600,
+		show_dscr_default, store_dscr_default);
+
+static void sysfs_create_dscr_default(void)
+{
+	int err = 0;
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			&attr_dscr_default.attr);
+}
 #endif /* CONFIG_PPC64 */
 
 #ifdef HAS_PPC_PMC_PA6T
@@ -617,6 +652,9 @@ static int __init topology_init(void)
 		if (cpu_online(cpu))
 			register_cpu_online(cpu);
 	}
+#ifdef CONFIG_PPC64
+	sysfs_create_dscr_default();
+#endif /* CONFIG_PPC64 */
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 375480c56eb9..f33acfd872ad 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -229,6 +229,9 @@ static u64 scan_dispatch_log(u64 stop_tb)
 	u64 stolen = 0;
 	u64 dtb;
 
+	if (!dtl)
+		return 0;
+
 	if (i == vpa->dtl_idx)
 		return 0;
 	while (i < vpa->dtl_idx) {
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5ddb801bc154..b13306b0d925 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err)
 #endif
 		printk("%s\n", ppc_md.name ? ppc_md.name : "");
 
-		sysfs_printk_last_file();
 		if (notify_die(DIE_OOPS, str, regs, err, 255,
 			       SIGSEGV) == NOTIFY_STOP)
 			return 1;
@@ -199,7 +198,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 	} else if (show_unhandled_signals &&
 		    unhandled_signal(current, signr) &&
 		    printk_ratelimit()) {
-			printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+			printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 				current->comm, current->pid, signr,
 				addr, regs->nip, regs->link, code);
 		}
@@ -221,7 +220,7 @@ void system_reset_exception(struct pt_regs *regs)
 	}
 
 #ifdef CONFIG_KEXEC
-	cpu_set(smp_processor_id(), cpus_in_sr);
+	cpumask_set_cpu(smp_processor_id(), &cpus_in_sr);
 #endif
 
 	die("System Reset", regs, SIGABRT);
@@ -909,6 +908,26 @@ static int emulate_instruction(struct pt_regs *regs)
 		return emulate_isel(regs, instword);
 	}
 
+#ifdef CONFIG_PPC64
+	/* Emulate the mfspr rD, DSCR. */
+	if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mfdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		regs->gpr[rd] = mfspr(SPRN_DSCR);
+		return 0;
+	}
+	/* Emulate the mtspr DSCR, rD. */
+	if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mtdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		mtspr(SPRN_DSCR, regs->gpr[rd]);
+		current->thread.dscr_inherit = 1;
+		return 0;
+	}
+#endif
+
 	return -EINVAL;
 }
 
@@ -1506,6 +1525,10 @@ struct ppc_emulated ppc_emulated = {
 #ifdef CONFIG_VSX
 	WARN_EMULATED_SETUP(vsx),
 #endif
+#ifdef CONFIG_PPC64
+	WARN_EMULATED_SETUP(mfdscr),
+	WARN_EMULATED_SETUP(mtdscr),
+#endif
 };
 
 u32 ppc_warn_emulated;
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index e39cad83c884..23d65abbedce 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -62,6 +62,8 @@ void __init udbg_early_init(void)
 	udbg_init_cpm();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
 	udbg_init_usbgecko();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
+	udbg_init_wsp();
 #endif
 
 #ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index baa33a7517bc..6837f839ab78 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <asm/udbg.h>
 #include <asm/io.h>
+#include <asm/reg_a2.h>
 
 extern u8 real_readb(volatile u8 __iomem  *addr);
 extern void real_writeb(u8 data, volatile u8 __iomem *addr);
@@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void)
 	udbg_getc_poll = NULL;
 }
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+static void udbg_wsp_flush(void)
+{
+	if (udbg_comport) {
+		while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+			/* wait for idle */;
+	}
+}
+
+static void udbg_wsp_putc(char c)
+{
+	if (udbg_comport) {
+		if (c == '\n')
+			udbg_wsp_putc('\r');
+		udbg_wsp_flush();
+		writeb(c, &udbg_comport->thr); eieio();
+	}
+}
+
+static int udbg_wsp_getc(void)
+{
+	if (udbg_comport) {
+		while ((readb(&udbg_comport->lsr) & LSR_DR) == 0)
+			; /* wait for char */
+		return readb(&udbg_comport->rbr);
+	}
+	return -1;
+}
+
+static int udbg_wsp_getc_poll(void)
+{
+	if (udbg_comport)
+		if (readb(&udbg_comport->lsr) & LSR_DR)
+			return readb(&udbg_comport->rbr);
+	return -1;
+}
+
+void __init udbg_init_wsp(void)
+{
+	udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT;
+
+	udbg_init_uart(udbg_comport, 57600, 50000000);
+
+	udbg_putc = udbg_wsp_putc;
+	udbg_flush = udbg_wsp_flush;
+	udbg_getc = udbg_wsp_getc;
+	udbg_getc_poll = udbg_wsp_getc_poll;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 9de6f396cf85..4d5a3edff49e 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -102,7 +102,7 @@ _GLOBAL(giveup_altivec)
 	MTMSRD(r5)			/* enable use of VMX now */
 	isync
 	PPC_LCMPI	0,r3,0
-	beqlr-				/* if no previous owner, done */
+	beqlr				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	PPC_LL	r5,PT_REGS(r3)
 	PPC_LCMPI	0,r5,0
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c961de40c676..0f95b5cce033 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -236,7 +236,7 @@ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 
 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
 {
-	return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
+	return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
 
 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 2b9c9088d00e..1a1b34487e71 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -35,9 +35,7 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
-#define LOAD_SHADOW_VCPU(reg)				\
-	mfspr	reg, SPRN_SPRG_PACA
-
+#define LOAD_SHADOW_VCPU(reg)	GET_PACA(reg)					
 #define SHADOW_VCPU_OFF		PACA_KVM_SVCPU
 #define MSR_NOIRQ		MSR_KERNEL & ~(MSR_IR | MSR_DR)
 #define FUNC(name) 		GLUE(.,name)
@@ -72,7 +70,7 @@
 .global kvmppc_trampoline_\intno
 kvmppc_trampoline_\intno:
 
-	mtspr	SPRN_SPRG_SCRATCH0, r13		/* Save r13 */
+	SET_SCRATCH0(r13)		/* Save r13 */
 
 	/*
 	 * First thing to do is to find out if we're coming
@@ -91,7 +89,7 @@ kvmppc_trampoline_\intno:
 	lwz	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 	mtcr	r12
 	PPC_LL	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
-	mfspr	r13, SPRN_SPRG_SCRATCH0		/* r13 = original r13 */
+	GET_SCRATCH0(r13)			/* r13 = original r13 */
 	b	kvmppc_resume_\intno		/* Get back original handler */
 
 	/* Now we know we're handling a KVM guest */
@@ -114,6 +112,9 @@ INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_MACHINE_CHECK
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL
+#ifdef CONFIG_PPC_BOOK3S_64
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL_HV
+#endif
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALIGNMENT
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PROGRAM
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_FP_UNAVAIL
@@ -158,7 +159,7 @@ kvmppc_handler_skip_ins:
 	lwz	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 	mtcr	r12
 	PPC_LL	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
-	mfspr	r13, SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r13)
 
 	/* And get back into the code */
 	RFI
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 7c52ed0b7051..451264274b8c 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -155,14 +155,20 @@ kvmppc_handler_trampoline_exit:
 	PPC_LL	r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13)
 
 	/* Save guest PC and MSR */
-	mfsrr0	r3
+	andi.	r0,r12,0x2
+	beq	1f
+	mfspr	r3,SPRN_HSRR0
+	mfspr	r4,SPRN_HSRR1
+	andi.	r12,r12,0x3ffd
+	b	2f
+1:	mfsrr0	r3
 	mfsrr1	r4
-
+2:
 	PPC_STL	r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13)
 	PPC_STL	r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
 
 	/* Get scratch'ed off registers */
-	mfspr	r9, SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r9)
 	PPC_LL	r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
 	lwz	r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index f53e09c7dac7..13b676c20d12 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -6,14 +6,6 @@
 
 #include <asm/system.h>
 
-void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-	if (mem_init_done)
-		return kmalloc(size, mask);
-	else
-		return alloc_bootmem(size);
-}
-
 void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 4d4eeb900486..53dcb6b1b708 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -6,6 +6,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
@@ -15,9 +16,9 @@ PPC64_CACHES:
         .tc             ppc64_caches[TC],ppc64_caches
         .section        ".text"
 
-
-_GLOBAL(copy_4K_page)
-	li	r5,4096		/* 4K page size */
+_GLOBAL(copy_page)
+	lis	r5,PAGE_SIZE@h
+	ori	r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
 	ld      r10,PPC64_CACHES@toc(r2)
 	lwz	r11,DCACHEL1LOGLINESIZE(r10)	/* log2 of cache line size */
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
index deac4d30daf4..e91615abae66 100644
--- a/arch/powerpc/lib/devres.c
+++ b/arch/powerpc/lib/devres.c
@@ -9,11 +9,11 @@
 
 #include <linux/device.h>	/* devres_*(), devm_ioremap_release() */
 #include <linux/gfp.h>
-#include <linux/io.h>		/* ioremap_flags() */
+#include <linux/io.h>		/* ioremap_prot() */
 #include <linux/module.h>	/* EXPORT_SYMBOL() */
 
 /**
- * devm_ioremap_prot - Managed ioremap_flags()
+ * devm_ioremap_prot - Managed ioremap_prot()
  * @dev: Generic device to remap IO address for
  * @offset: BUS offset to map
  * @size: Size of map
@@ -31,7 +31,7 @@ void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
 	if (!ptr)
 		return NULL;
 
-	addr = ioremap_flags(offset, size, flags);
+	addr = ioremap_prot(offset, size, flags);
 	if (addr) {
 		*ptr = addr;
 		devres_add(dev, ptr);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ae5189ab0049..9a52349874ee 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
+#include <linux/prefetch.h>
 #include <asm/sstep.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
@@ -45,6 +46,18 @@ extern int do_stxvd2x(int rn, unsigned long ea);
 #endif
 
 /*
+ * Emulate the truncation of 64 bit values in 32-bit mode.
+ */
+static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val)
+{
+#ifdef __powerpc64__
+	if ((msr & MSR_64BIT) == 0)
+		val &= 0xffffffffUL;
+#endif
+	return val;
+}
+
+/*
  * Determine whether a conditional branch instruction would branch.
  */
 static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs)
@@ -90,11 +103,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs
 		if (instr & 0x04000000)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-#ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-#endif
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 
 #ifdef __powerpc64__
@@ -113,9 +123,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg
 		if ((instr & 3) == 1)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 #endif /* __powerpc64 */
 
@@ -136,11 +145,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs
 		if (do_update)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-#ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-#endif
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 
 /*
@@ -466,7 +472,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int rd)
 
 	regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
 #ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
+	if (!(regs->msr & MSR_64BIT))
 		val = (int) val;
 #endif
 	if (val < 0)
@@ -487,7 +493,7 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
 		++val;
 	regs->gpr[rd] = val;
 #ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF)) {
+	if (!(regs->msr & MSR_64BIT)) {
 		val = (unsigned int) val;
 		val1 = (unsigned int) val1;
 	}
@@ -570,8 +576,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		if ((instr & 2) == 0)
 			imm += regs->nip;
 		regs->nip += 4;
-		if ((regs->msr & MSR_SF) == 0)
-			regs->nip &= 0xffffffffUL;
+		regs->nip = truncate_if_32bit(regs->msr, regs->nip);
 		if (instr & 1)
 			regs->link = regs->nip;
 		if (branch_taken(instr, regs))
@@ -604,13 +609,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			imm -= 0x04000000;
 		if ((instr & 2) == 0)
 			imm += regs->nip;
-		if (instr & 1) {
-			regs->link = regs->nip + 4;
-			if ((regs->msr & MSR_SF) == 0)
-				regs->link &= 0xffffffffUL;
-		}
-		if ((regs->msr & MSR_SF) == 0)
-			imm &= 0xffffffffUL;
+		if (instr & 1)
+			regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
+		imm = truncate_if_32bit(regs->msr, imm);
 		regs->nip = imm;
 		return 1;
 	case 19:
@@ -618,11 +619,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		case 16:	/* bclr */
 		case 528:	/* bcctr */
 			imm = (instr & 0x400)? regs->ctr: regs->link;
-			regs->nip += 4;
-			if ((regs->msr & MSR_SF) == 0) {
-				regs->nip &= 0xffffffffUL;
-				imm &= 0xffffffffUL;
-			}
+			regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+			imm = truncate_if_32bit(regs->msr, imm);
 			if (instr & 1)
 				regs->link = regs->nip;
 			if (branch_taken(instr, regs))
@@ -1616,11 +1614,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		return 0;	/* invoke DSI if -EFAULT? */
 	}
  instr_done:
-	regs->nip += 4;
-#ifdef __powerpc64__
-	if ((regs->msr & MSR_SF) == 0)
-		regs->nip &= 0xffffffffUL;
-#endif
+	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
 	return 1;
 
  logical_done:
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 5b7dd4ea02b5..a242b5d7cbe4 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -118,7 +118,7 @@ _GLOBAL(__hash_page_4K)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28
 	rldicl	r3,r3,0,36
@@ -401,7 +401,7 @@ _GLOBAL(__hash_page_4K)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28		/* r29 = (vsid << 28) */
 	rldicl	r3,r3,0,36		/* r3 = (ea & 0x0fffffff) */
@@ -715,7 +715,7 @@ BEGIN_FTR_SECTION
 	andi.	r0,r31,_PAGE_NO_CACHE
 	/* If so, bail out and refault as a 4k page */
 	bne-	ht64_bail_ok
-END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE)
 	/* Prepare new PTE value (turn access RW into DIRTY, then
 	 * add BUSY and ACCESSED)
 	 */
@@ -736,7 +736,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28
 	rldicl	r3,r3,0,36
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 784a400e0781..dfd764896db0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -50,9 +50,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 	case MMU_PAGE_4K:
 		va &= ~0xffful;
 		va |= ssize << 8;
-		asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0),
-					       %2)
-			     : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
 			     : "memory");
 		break;
 	default:
@@ -61,9 +60,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 		va |= penc << 12;
 		va |= ssize << 8;
 		va |= 1; /* L */
-		asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0),
-					       %2)
-			     : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
 			     : "memory");
 		break;
 	}
@@ -98,8 +96,8 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize)
 
 static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 {
-	unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 	if (use_local)
 		use_local = mmu_psize_defs[psize].tlbiel;
@@ -503,7 +501,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 		} pte_iterate_hashed_end();
 	}
 
-	if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
 	    mmu_psize_defs[psize].tlbiel && local) {
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
@@ -517,7 +515,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 		}
 		asm volatile("ptesync":::"memory");
 	} else {
-		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 58a022d0f463..26b2872b3d00 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -53,6 +53,7 @@
 #include <asm/sections.h>
 #include <asm/spu.h>
 #include <asm/udbg.h>
+#include <asm/code-patching.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -258,11 +259,11 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 	for (; size >= 4; size -= 4, ++prop) {
 		if (prop[0] == 40) {
 			DBG("1T segment support detected\n");
-			cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT;
+			cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
 			return 1;
 		}
 	}
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_NO_SLBIE_B;
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
 	return 0;
 }
 
@@ -288,7 +289,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 	if (prop != NULL) {
 		DBG("Page sizes from device-tree:\n");
 		size /= 4;
-		cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
 		while(size > 0) {
 			unsigned int shift = prop[0];
 			unsigned int slbenc = prop[1];
@@ -316,7 +317,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 				break;
 			case 0x18:
 				idx = MMU_PAGE_16M;
-				cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+				cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
 				break;
 			case 0x22:
 				idx = MMU_PAGE_16G;
@@ -411,7 +412,7 @@ static void __init htab_init_page_sizes(void)
 	 * Not in the device-tree, let's fallback on known size
 	 * list for 16M capable GP & GR
 	 */
-	if (cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (mmu_has_feature(MMU_FTR_16M_PAGE))
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
  found:
@@ -441,7 +442,7 @@ static void __init htab_init_page_sizes(void)
 		mmu_vmalloc_psize = MMU_PAGE_64K;
 		if (mmu_linear_psize == MMU_PAGE_4K)
 			mmu_linear_psize = MMU_PAGE_64K;
-		if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) {
+		if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
 			/*
 			 * Don't use 64k pages for ioremap on pSeries, since
 			 * that would stop us accessing the HEA ethernet.
@@ -547,15 +548,7 @@ int remove_section_mapping(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static inline void make_bl(unsigned int *insn_addr, void *func)
-{
-	unsigned long funcp = *((unsigned long *)func);
-	int offset = funcp - (unsigned long)insn_addr;
-
-	*insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
-	flush_icache_range((unsigned long)insn_addr, 4+
-			   (unsigned long)insn_addr);
-}
+#define FUNCTION_TEXT(A)	((*(unsigned long *)(A)))
 
 static void __init htab_finish_init(void)
 {
@@ -570,16 +563,33 @@ static void __init htab_finish_init(void)
 	extern unsigned int *ht64_call_hpte_remove;
 	extern unsigned int *ht64_call_hpte_updatepp;
 
-	make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
-	make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
-	make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
-	make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+	patch_branch(ht64_call_hpte_insert1,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_insert2,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_remove,
+		FUNCTION_TEXT(ppc_md.hpte_remove),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_updatepp,
+		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		BRANCH_SET_LINK);
+
 #endif /* CONFIG_PPC_HAS_HASH_64K */
 
-	make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
-	make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
+	patch_branch(htab_call_hpte_insert1,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_insert2,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_remove,
+		FUNCTION_TEXT(ppc_md.hpte_remove),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_updatepp,
+		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		BRANCH_SET_LINK);
 }
 
 static void __init htab_initialize(void)
@@ -598,7 +608,7 @@ static void __init htab_initialize(void)
 	/* Initialize page sizes */
 	htab_init_page_sizes();
 
-	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
 		mmu_kernel_ssize = MMU_SEGSIZE_1T;
 		mmu_highuser_ssize = MMU_SEGSIZE_1T;
 		printk(KERN_INFO "Using 1TB segments\n");
@@ -739,7 +749,7 @@ void __init early_init_mmu(void)
 
 	/* Initialize stab / SLB management except on iSeries
 	 */
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		slb_initialize();
 	else if (!firmware_has_feature(FW_FEATURE_ISERIES))
 		stab_initialize(get_paca()->stab_real);
@@ -756,7 +766,7 @@ void __cpuinit early_init_mmu_secondary(void)
 	 * in real mode on pSeries and we want a virtual address on
 	 * iSeries anyway
 	 */
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		slb_initialize();
 	else
 		stab_initialize(get_paca()->stab_addr);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9bb249c3046e..0b9a5c1901b9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -529,7 +529,7 @@ static int __init hugetlbpage_init(void)
 {
 	int psize;
 
-	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
 		return -ENODEV;
 
 	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 2535828aa84b..3bafc3deca6d 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -20,9 +20,205 @@
 #include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_PPC_ICSWX
+/*
+ * The processor and its L2 cache cause the icswx instruction to
+ * generate a COP_REQ transaction on PowerBus. The transaction has
+ * no address, and the processor does not perform an MMU access
+ * to authenticate the transaction. The command portion of the
+ * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and
+ * the coprocessor Process ID (PID), which the coprocessor compares
+ * to the authorized LPID and PID held in the coprocessor, to determine
+ * if the process is authorized to generate the transaction.
+ * The data of the COP_REQ transaction is 128-byte or less and is
+ * placed in cacheable memory on a 128-byte cache line boundary.
+ *
+ * The task to use a coprocessor should use use_cop() to allocate
+ * a coprocessor PID before executing icswx instruction. use_cop()
+ * also enables the coprocessor context switching. Drop_cop() is
+ * used to free the coprocessor PID.
+ *
+ * Example:
+ * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
+ * Each HFI have multiple windows. Each HFI window serves as a
+ * network device sending to and receiving from HFI network.
+ * HFI immediate send function uses icswx instruction. The immediate
+ * send function allows small (single cache-line) packets be sent
+ * without using the regular HFI send FIFO and doorbell, which are
+ * much slower than immediate send.
+ *
+ * For each task intending to use HFI immediate send, the HFI driver
+ * calls use_cop() to obtain a coprocessor PID for the task.
+ * The HFI driver then allocate a free HFI window and save the
+ * coprocessor PID to the HFI window to allow the task to use the
+ * HFI window.
+ *
+ * The HFI driver repeatedly creates immediate send packets and
+ * issues icswx instruction to send data through the HFI window.
+ * The HFI compares the coprocessor PID in the CPU PID register
+ * to the PID held in the HFI window to determine if the transaction
+ * is allowed.
+ *
+ * When the task to release the HFI window, the HFI driver calls
+ * drop_cop() to release the coprocessor PID.
+ */
+
+#define COP_PID_NONE 0
+#define COP_PID_MIN (COP_PID_NONE + 1)
+#define COP_PID_MAX (0xFFFF)
+
+static DEFINE_SPINLOCK(mmu_context_acop_lock);
+static DEFINE_IDA(cop_ida);
+
+void switch_cop(struct mm_struct *next)
+{
+	mtspr(SPRN_PID, next->context.cop_pid);
+	mtspr(SPRN_ACOP, next->context.acop);
+}
+
+static int new_cop_pid(struct ida *ida, int min_id, int max_id,
+		       spinlock_t *lock)
+{
+	int index;
+	int err;
+
+again:
+	if (!ida_pre_get(ida, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(lock);
+	err = ida_get_new_above(ida, min_id, &index);
+	spin_unlock(lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > max_id) {
+		spin_lock(lock);
+		ida_remove(ida, index);
+		spin_unlock(lock);
+		return -ENOMEM;
+	}
+
+	return index;
+}
+
+static void sync_cop(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (mm == current->active_mm)
+		switch_cop(current->active_mm);
+}
+
+/**
+ * Start using a coprocessor.
+ * @acop: mask of coprocessor to be used.
+ * @mm: The mm the coprocessor to associate with. Most likely current mm.
+ *
+ * Return a positive PID if successful. Negative errno otherwise.
+ * The returned PID will be fed to the coprocessor to determine if an
+ * icswx transaction is authenticated.
+ */
+int use_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return -ENODEV;
+
+	if (!mm || !acop)
+		return -EINVAL;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	if (mm->context.cop_pid == COP_PID_NONE) {
+		ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
+				  &mmu_context_acop_lock);
+		if (ret < 0)
+			goto out;
+
+		mm->context.cop_pid = ret;
+	}
+	mm->context.acop |= acop;
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	ret = mm->context.cop_pid;
+
+out:
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(use_cop);
+
+/**
+ * Stop using a coprocessor.
+ * @acop: mask of coprocessor to be stopped.
+ * @mm: The mm the coprocessor associated with.
+ */
+void drop_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int free_pid = COP_PID_NONE;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return;
+
+	if (WARN_ON_ONCE(!mm))
+		return;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	mm->context.acop &= ~acop;
+
+	if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
+		free_pid = mm->context.cop_pid;
+		mm->context.cop_pid = COP_PID_NONE;
+	}
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	if (free_pid != COP_PID_NONE) {
+		spin_lock(&mmu_context_acop_lock);
+		ida_remove(&cop_ida, free_pid);
+		spin_unlock(&mmu_context_acop_lock);
+	}
+
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(drop_cop);
+
+#endif /* CONFIG_PPC_ICSWX */
+
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
@@ -31,7 +227,6 @@ static DEFINE_IDA(mmu_context_ida);
  * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
  * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
  */
-#define NO_CONTEXT	0
 #define MAX_CONTEXT	((1UL << 19) - 1)
 
 int __init_new_context(void)
@@ -79,6 +274,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 		slice_set_user_psize(mm, mmu_virtual_psize);
 	subpage_prot_init_new_context(mm);
 	mm->context.id = index;
+#ifdef CONFIG_PPC_ICSWX
+	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+	if (!mm->context.cop_lockp) {
+		__destroy_context(index);
+		subpage_prot_free(mm);
+		mm->context.id = MMU_NO_CONTEXT;
+		return -ENOMEM;
+	}
+	spin_lock_init(mm->context.cop_lockp);
+#endif /* CONFIG_PPC_ICSWX */
 
 	return 0;
 }
@@ -93,7 +298,12 @@ EXPORT_SYMBOL_GPL(__destroy_context);
 
 void destroy_context(struct mm_struct *mm)
 {
+#ifdef CONFIG_PPC_ICSWX
+	drop_cop(mm->context.acop, mm);
+	kfree(mm->context.cop_lockp);
+	mm->context.cop_lockp = NULL;
+#endif /* CONFIG_PPC_ICSWX */
 	__destroy_context(mm->context.id);
 	subpage_prot_free(mm);
-	mm->context.id = NO_CONTEXT;
+	mm->context.id = MMU_NO_CONTEXT;
 }
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index c0aab52da3a5..336807de550e 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -338,12 +338,14 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
 		return NOTIFY_OK;
 
 	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
 		pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
 		stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
@@ -407,7 +409,17 @@ void __init mmu_context_init(void)
 	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
 		first_context = 1;
 		last_context = 65535;
-	} else {
+	} else
+#ifdef CONFIG_PPC_BOOK3E_MMU
+	if (mmu_has_feature(MMU_FTR_TYPE_3E)) {
+		u32 mmucfg = mfspr(SPRN_MMUCFG);
+		u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK)
+				>> MMUCFG_PIDSIZE_SHIFT;
+		first_context = 1;
+		last_context = (1UL << (pid_bits + 1)) - 1;
+	} else
+#endif
+	{
 		first_context = 1;
 		last_context = 255;
 	}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5ec1dad2a19d..2164006fe170 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -311,14 +311,13 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
 static int __init find_min_common_depth(void)
 {
 	int depth;
-	struct device_node *rtas_root;
 	struct device_node *chosen;
+	struct device_node *root;
 	const char *vec5;
 
-	rtas_root = of_find_node_by_path("/rtas");
-
-	if (!rtas_root)
-		return -1;
+	root = of_find_node_by_path("/rtas");
+	if (!root)
+		root = of_find_node_by_path("/");
 
 	/*
 	 * This property is a set of 32-bit integers, each representing
@@ -332,7 +331,7 @@ static int __init find_min_common_depth(void)
 	 * NUMA boundary and the following are progressively less significant
 	 * boundaries. There can be more than one level of NUMA.
 	 */
-	distance_ref_points = of_get_property(rtas_root,
+	distance_ref_points = of_get_property(root,
 					"ibm,associativity-reference-points",
 					&distance_ref_points_depth);
 
@@ -376,11 +375,11 @@ static int __init find_min_common_depth(void)
 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
 	}
 
-	of_node_put(rtas_root);
+	of_node_put(root);
 	return depth;
 
 err:
-	of_node_put(rtas_root);
+	of_node_put(root);
 	return -1;
 }
 
@@ -1453,7 +1452,7 @@ int arch_update_cpu_topology(void)
 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	struct sys_device *sysdev;
 
-	for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
+	for_each_cpu(cpu,&cpu_associativity_changes_mask) {
 		vphn_get_associativity(cpu, associativity);
 		nid = associativity_to_nid(associativity);
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 8dc41c0157fe..51f87956f8f8 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -133,7 +133,15 @@ ioremap(phys_addr_t addr, unsigned long size)
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *
-ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
+ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *
+ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
 {
 	/* writeable implies dirty for kernel addresses */
 	if (flags & _PAGE_RW)
@@ -152,7 +160,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
 
 	return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
 }
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_prot);
 
 void __iomem *
 __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 88927a05cdc2..6e595f6496d4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -255,7 +255,17 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 	return __ioremap_caller(addr, size, flags, caller);
 }
 
-void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
+void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+	unsigned long flags = _PAGE_NO_CACHE;
+	void *caller = __builtin_return_address(0);
+
+	if (ppc_md.ioremap)
+		return ppc_md.ioremap(addr, size, flags, caller);
+	return __ioremap_caller(addr, size, flags, caller);
+}
+
+void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
 			     unsigned long flags)
 {
 	void *caller = __builtin_return_address(0);
@@ -311,7 +321,8 @@ void iounmap(volatile void __iomem *token)
 }
 
 EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_wc);
+EXPORT_SYMBOL(ioremap_prot);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(__ioremap_at);
 EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 1d98ecc8eecd..e22276cb67a4 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -24,6 +24,7 @@
 #include <asm/firmware.h>
 #include <linux/compiler.h>
 #include <asm/udbg.h>
+#include <asm/code-patching.h>
 
 
 extern void slb_allocate_realmode(unsigned long ea);
@@ -166,7 +167,7 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
 	int esid_1t_count;
 
 	/* System is not 1T segment size capable. */
-	if (!cpu_has_feature(CPU_FTR_1T_SEGMENT))
+	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		return (GET_ESID(addr1) == GET_ESID(addr2));
 
 	esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
@@ -201,7 +202,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	hard_irq_disable();
 	offset = get_paca()->slb_cache_ptr;
-	if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) &&
+	if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
 	    offset <= SLB_CACHE_ENTRIES) {
 		int i;
 		asm volatile("isync" : : : "memory");
@@ -249,9 +250,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 static inline void patch_slb_encoding(unsigned int *insn_addr,
 				      unsigned int immed)
 {
-	*insn_addr = (*insn_addr & 0xffff0000) | immed;
-	flush_icache_range((unsigned long)insn_addr, 4+
-			   (unsigned long)insn_addr);
+	int insn = (*insn_addr & 0xffff0000) | immed;
+	patch_instruction(insn_addr, insn);
 }
 
 void slb_set_size(u16 size)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 95ce35581696..ef653dc95b65 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -58,7 +58,7 @@ _GLOBAL(slb_miss_kernel_load_linear)
 	li	r11,0
 BEGIN_FTR_SECTION
 	b	slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
 1:
@@ -87,7 +87,7 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
 6:
 BEGIN_FTR_SECTION
 	b	slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
 0:	/* user address: proto-VSID = context << 15 | ESID. First check
@@ -138,11 +138,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
 	ld	r9,PACACONTEXTID(r13)
 BEGIN_FTR_SECTION
 	cmpldi	r10,0x1000
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	rldimi	r10,r9,USER_ESID_BITS,0
 BEGIN_FTR_SECTION
 	bge	slb_finish_load_1T
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load
 
 8:	/* invalid EA */
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 446a01842a73..41e31642a86a 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -243,7 +243,7 @@ void __init stabs_alloc(void)
 {
 	int cpu;
 
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		return;
 
 	for_each_possible_cpu(cpu) {
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index aa46e9d1e771..19395f18b1db 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -87,7 +87,7 @@ static void __cpuinit smp_iss4xx_setup_cpu(int cpu)
 	mpic_setup_this_cpu();
 }
 
-static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
+static int __cpuinit smp_iss4xx_kick_cpu(int cpu)
 {
 	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
 	const u64 *spin_table_addr_prop;
@@ -104,7 +104,7 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
 					       NULL);
 	if (spin_table_addr_prop == NULL) {
 		pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu);
-		return;
+		return -ENOENT;
 	}
 
 	/* Assume it's mapped as part of the linear mapping. This is a bit
@@ -117,6 +117,8 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
 	smp_wmb();
 	spin_table[1] = __pa(start_secondary_47x);
 	mb();
+
+	return 0;
 }
 
 static struct smp_ops_t iss_smp_ops = {
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index cfc4b2009982..9f09319352c0 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -61,7 +61,7 @@ irq_to_pic_bit(unsigned int irq)
 static void
 cpld_mask_irq(struct irq_data *d)
 {
-	unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
 	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
 
 	out_8(pic_mask,
@@ -71,7 +71,7 @@ cpld_mask_irq(struct irq_data *d)
 static void
 cpld_unmask_irq(struct irq_data *d)
 {
-	unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
 	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
 
 	out_8(pic_mask,
@@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
 	status |= (ignore | mask);
 
 	if (status == 0xff)
-		return NO_IRQ_IGNORE;
+		return NO_IRQ;
 
 	cpld_irq = ffz(status) + offset;
 
@@ -109,14 +109,14 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
 {
 	irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
 		&cpld_regs->pci_mask);
-	if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+	if (irq != NO_IRQ) {
 		generic_handle_irq(irq);
 		return;
 	}
 
 	irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
 		&cpld_regs->misc_mask);
-	if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+	if (irq != NO_IRQ) {
 		generic_handle_irq(irq);
 		return;
 	}
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 57a6a349e932..96f85e5e0cd3 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -56,7 +56,7 @@ static void media5200_irq_unmask(struct irq_data *d)
 
 	spin_lock_irqsave(&media5200_irq.lock, flags);
 	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
-	val |= 1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq);
+	val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d));
 	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
 	spin_unlock_irqrestore(&media5200_irq.lock, flags);
 }
@@ -68,7 +68,7 @@ static void media5200_irq_mask(struct irq_data *d)
 
 	spin_lock_irqsave(&media5200_irq.lock, flags);
 	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
-	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq));
+	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)));
 	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
 	spin_unlock_irqrestore(&media5200_irq.lock, flags);
 }
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 1dd15400f6f0..1a9a49570579 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -157,48 +157,30 @@ static inline void io_be_clrbit(u32 __iomem *addr, int bitno)
  */
 static void mpc52xx_extirq_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->ctrl, 11 - l2irq);
 }
 
 static void mpc52xx_extirq_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->ctrl, 11 - l2irq);
 }
 
 static void mpc52xx_extirq_ack(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->ctrl, 27-l2irq);
 }
 
 static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
 {
 	u32 ctrl_reg, type;
-	int irq;
-	int l2irq;
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	void *handler = handle_level_irq;
 
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
-	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, irq, l2irq, flow_type);
+	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__,
+		(int) irqd_to_hwirq(d), l2irq, flow_type);
 
 	switch (flow_type) {
 	case IRQF_TRIGGER_HIGH: type = 0; break;
@@ -237,23 +219,13 @@ static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type)
 
 static void mpc52xx_main_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->main_mask, 16 - l2irq);
 }
 
 static void mpc52xx_main_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->main_mask, 16 - l2irq);
 }
 
@@ -270,23 +242,13 @@ static struct irq_chip mpc52xx_main_irqchip = {
  */
 static void mpc52xx_periph_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->per_mask, 31 - l2irq);
 }
 
 static void mpc52xx_periph_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->per_mask, 31 - l2irq);
 }
 
@@ -303,34 +265,19 @@ static struct irq_chip mpc52xx_periph_irqchip = {
  */
 static void mpc52xx_sdma_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&sdma->IntMask, l2irq);
 }
 
 static void mpc52xx_sdma_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&sdma->IntMask, l2irq);
 }
 
 static void mpc52xx_sdma_ack(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	out_be32(&sdma->IntPend, 1 << l2irq);
 }
 
@@ -539,7 +486,7 @@ void __init mpc52xx_init_irq(void)
 unsigned int mpc52xx_get_irq(void)
 {
 	u32 status;
-	int irq = NO_IRQ_IGNORE;
+	int irq;
 
 	status = in_be32(&intr->enc_status);
 	if (status & 0x00000400) {	/* critical */
@@ -562,6 +509,8 @@ unsigned int mpc52xx_get_irq(void)
 		} else {
 			irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
 		}
+	} else {
+		return NO_IRQ;
 	}
 
 	return irq_linear_revmap(mpc52xx_irqhost, irq);
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 4a4eb6ffa12f..8ccf9ed62fe2 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -42,7 +42,7 @@ struct pq2ads_pci_pic {
 static void pq2ads_pci_mask_irq(struct irq_data *d)
 {
 	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
-	int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
 
 	if (irq != -1) {
 		unsigned long flags;
@@ -58,7 +58,7 @@ static void pq2ads_pci_mask_irq(struct irq_data *d)
 static void pq2ads_pci_unmask_irq(struct irq_data *d)
 {
 	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
-	int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
 
 	if (irq != -1) {
 		unsigned long flags;
@@ -112,16 +112,8 @@ static int pci_pic_host_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void pci_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	/* remove chip and handler */
-	irq_set_chip_data(virq, NULL);
-	irq_set_chip(virq, NULL);
-}
-
 static struct irq_host_ops pci_pic_host_ops = {
 	.map = pci_pic_host_map,
-	.unmap = pci_host_unmap,
 };
 
 int __init pq2ads_pci_init_irq(void)
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 188272934cfb..104faa8aa23c 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
 	.end = mpc83xx_suspend_end,
 };
 
+static struct of_device_id pmc_match[];
 static int pmc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct resource res;
 	struct pmc_type *type;
 	int ret = 0;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(pmc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
-	type = ofdev->dev.of_match->data;
+	type = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 0d00ff9d05a0..d6a93a10c0f5 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -41,7 +41,7 @@ extern void __early_start(void);
 #define NUM_BOOT_ENTRY		8
 #define SIZE_BOOT_ENTRY		(NUM_BOOT_ENTRY * sizeof(u32))
 
-static void __init
+static int __init
 smp_85xx_kick_cpu(int nr)
 {
 	unsigned long flags;
@@ -60,7 +60,7 @@ smp_85xx_kick_cpu(int nr)
 
 	if (cpu_rel_addr == NULL) {
 		printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
-		return;
+		return -ENOENT;
 	}
 
 	/*
@@ -107,6 +107,8 @@ smp_85xx_kick_cpu(int nr)
 		iounmap(bptr_vaddr);
 
 	pr_debug("waited %d msecs for CPU #%d.\n", n, nr);
+
+	return 0;
 }
 
 static void __init
@@ -233,8 +235,10 @@ void __init mpc85xx_smp_init(void)
 		smp_85xx_ops.message_pass = smp_mpic_message_pass;
 	}
 
-	if (cpu_has_feature(CPU_FTR_DBELL))
-		smp_85xx_ops.message_pass = doorbell_message_pass;
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		smp_85xx_ops.message_pass = smp_muxed_ipi_message_pass;
+		smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
+	}
 
 	BUG_ON(!smp_85xx_ops.message_pass);
 
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index db864623b4ae..12cb9bb2cc68 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -48,8 +48,6 @@ static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = {
 	[8] = {0, IRQ_TYPE_LEVEL_HIGH},
 };
 
-#define socrates_fpga_irq_to_hw(virq)    ((unsigned int)irq_map[virq].hwirq)
-
 static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock);
 
 static void __iomem *socrates_fpga_pic_iobase;
@@ -110,11 +108,9 @@ void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
 static void socrates_fpga_pic_ack(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq, irq_line;
+	unsigned int irq_line, hwirq = irqd_to_hwirq(d);
 	uint32_t mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -127,12 +123,10 @@ static void socrates_fpga_pic_ack(struct irq_data *d)
 static void socrates_fpga_pic_mask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -145,12 +139,10 @@ static void socrates_fpga_pic_mask(struct irq_data *d)
 static void socrates_fpga_pic_mask_ack(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -164,12 +156,10 @@ static void socrates_fpga_pic_mask_ack(struct irq_data *d)
 static void socrates_fpga_pic_unmask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -182,12 +172,10 @@ static void socrates_fpga_pic_unmask(struct irq_data *d)
 static void socrates_fpga_pic_eoi(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -201,12 +189,10 @@ static int socrates_fpga_pic_set_type(struct irq_data *d,
 		unsigned int flow_type)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int polarity;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE)
 		return -EINVAL;
 
diff --git a/arch/powerpc/platforms/86xx/gef_pic.c b/arch/powerpc/platforms/86xx/gef_pic.c
index 0beec7d5566b..94594e58594c 100644
--- a/arch/powerpc/platforms/86xx/gef_pic.c
+++ b/arch/powerpc/platforms/86xx/gef_pic.c
@@ -46,8 +46,6 @@
 #define GEF_PIC_CPU0_MCP_MASK	GEF_PIC_MCP_MASK(0)
 #define GEF_PIC_CPU1_MCP_MASK	GEF_PIC_MCP_MASK(1)
 
-#define gef_irq_to_hw(virq)    ((unsigned int)irq_map[virq].hwirq)
-
 
 static DEFINE_RAW_SPINLOCK(gef_pic_lock);
 
@@ -113,11 +111,9 @@ void gef_pic_cascade(unsigned int irq, struct irq_desc *desc)
 static void gef_pic_mask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	u32 mask;
 
-	hwirq = gef_irq_to_hw(d->irq);
-
 	raw_spin_lock_irqsave(&gef_pic_lock, flags);
 	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
 	mask &= ~(1 << hwirq);
@@ -136,11 +132,9 @@ static void gef_pic_mask_ack(struct irq_data *d)
 static void gef_pic_unmask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	u32 mask;
 
-	hwirq = gef_irq_to_hw(d->irq);
-
 	raw_spin_lock_irqsave(&gef_pic_lock, flags);
 	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
 	mask |= (1 << hwirq);
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 018cc67be426..a896511690c2 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -66,7 +66,7 @@ static void __init mpc8610_suspend_init(void)
 		return;
 	}
 
-	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9/wakeup", NULL);
+	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL);
 	if (ret) {
 		pr_err("%s: can't request pixis event IRQ: %d\n",
 		       __func__, ret);
@@ -105,45 +105,77 @@ machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
 
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 
-static u32 get_busfreq(void)
-{
-	struct device_node *node;
-
-	u32 fs_busfreq = 0;
-	node = of_find_node_by_type(NULL, "cpu");
-	if (node) {
-		unsigned int size;
-		const unsigned int *prop =
-			of_get_property(node, "bus-frequency", &size);
-		if (prop)
-			fs_busfreq = *prop;
-		of_node_put(node);
-	};
-	return fs_busfreq;
-}
+/*
+ * DIU Area Descriptor
+ *
+ * The MPC8610 reference manual shows the bits of the AD register in
+ * little-endian order, which causes the BLUE_C field to be split into two
+ * parts. To simplify the definition of the MAKE_AD() macro, we define the
+ * fields in big-endian order and byte-swap the result.
+ *
+ * So even though the registers don't look like they're in the
+ * same bit positions as they are on the P1022, the same value is written to
+ * the AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F		0x10000000
+#define AD_ALPHA_C_MASK		0x0E000000
+#define AD_ALPHA_C_SHIFT	25
+#define AD_BLUE_C_MASK		0x01800000
+#define AD_BLUE_C_SHIFT		23
+#define AD_GREEN_C_MASK		0x00600000
+#define AD_GREEN_C_SHIFT	21
+#define AD_RED_C_MASK		0x00180000
+#define AD_RED_C_SHIFT		19
+#define AD_PALETTE		0x00040000
+#define AD_PIXEL_S_MASK		0x00030000
+#define AD_PIXEL_S_SHIFT	16
+#define AD_COMP_3_MASK		0x0000F000
+#define AD_COMP_3_SHIFT		12
+#define AD_COMP_2_MASK		0x00000F00
+#define AD_COMP_2_SHIFT		8
+#define AD_COMP_1_MASK		0x000000F0
+#define AD_COMP_1_SHIFT		4
+#define AD_COMP_0_MASK		0x0000000F
+#define AD_COMP_0_SHIFT		0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+	cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+	(blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+	(red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+	(c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+	(c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
 
 unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
 						int monitor_port)
 {
 	static const unsigned long pixelformat[][3] = {
-		{0x88882317, 0x88083218, 0x65052119},
-		{0x88883316, 0x88082219, 0x65053118},
+		{
+			MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0)
+		},
+		{
+			MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0)
+		},
 	};
-	unsigned int pix_fmt, arch_monitor;
+	unsigned int arch_monitor;
 
+	/* The DVI port is mis-wired on revision 1 of this board. */
 	arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1;
-		/* DVI port for board version 0x01 */
-
-	if (bits_per_pixel == 32)
-		pix_fmt = pixelformat[arch_monitor][0];
-	else if (bits_per_pixel == 24)
-		pix_fmt = pixelformat[arch_monitor][1];
-	else if (bits_per_pixel == 16)
-		pix_fmt = pixelformat[arch_monitor][2];
-	else
-		pix_fmt = pixelformat[1][0];
-
-	return pix_fmt;
+
+	switch (bits_per_pixel) {
+	case 32:
+		return pixelformat[arch_monitor][0];
+	case 24:
+		return pixelformat[arch_monitor][1];
+	case 16:
+		return pixelformat[arch_monitor][2];
+	default:
+		pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel);
+		return 0;
+	}
 }
 
 void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
@@ -190,8 +222,7 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
 	}
 
 	/* Pixel Clock configuration */
-	pr_debug("DIU: Bus Frequency = %d\n", get_busfreq());
-	speed_ccb = get_busfreq();
+	speed_ccb = fsl_get_sys_freq();
 
 	/* Calculate the pixel clock with the smallest error */
 	/* calculate the following in steps to avoid overflow */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index eacea0e3fcc8..af09baee22cb 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -56,7 +56,7 @@ smp_86xx_release_core(int nr)
 }
 
 
-static void __init
+static int __init
 smp_86xx_kick_cpu(int nr)
 {
 	unsigned int save_vector;
@@ -65,7 +65,7 @@ smp_86xx_kick_cpu(int nr)
 	unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100);
 
 	if (nr < 0 || nr >= NR_CPUS)
-		return;
+		return -ENOENT;
 
 	pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr);
 
@@ -92,6 +92,8 @@ smp_86xx_kick_cpu(int nr)
 	local_irq_restore(flags);
 
 	pr_debug("wait CPU #%d for %d msecs.\n", nr, n);
+
+	return 0;
 }
 
 
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 9ecce995dd4b..1e121088826f 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -150,7 +150,7 @@ void __init mpc8xx_calibrate_decr(void)
 	 */
 	cpu = of_find_node_by_type(NULL, "cpu");
 	virq= irq_of_parse_and_map(cpu, 0);
-	irq = irq_map[virq].hwirq;
+	irq = virq_to_hw(virq);
 
 	sys_tmr2 = immr_map(im_sit);
 	out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f7b07720aa30..f970ca2b180c 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -20,6 +20,7 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/wsp/Kconfig"
 
 config KVM_GUEST
 	bool "KVM Guest support"
@@ -56,16 +57,19 @@ config UDBG_RTAS_CONSOLE
 	depends on PPC_RTAS
 	default n
 
+config PPC_SMP_MUXED_IPI
+	bool
+	help
+	  Select this opton if your platform supports SMP and your
+	  interrupt controller provides less than 4 interrupts to each
+	  cpu.	This will enable the generic code to multiplex the 4
+	  messages on to one ipi.
+
 config PPC_UDBG_BEAT
 	bool "BEAT based debug console"
 	depends on PPC_CELLEB
 	default n
 
-config XICS
-	depends on PPC_PSERIES
-	bool
-	default y
-
 config IPIC
 	bool
 	default n
@@ -147,14 +151,27 @@ config PPC_970_NAP
 	bool
 	default n
 
+config PPC_P7_NAP
+	bool
+	default n
+
 config PPC_INDIRECT_IO
 	bool
 	select GENERIC_IOMAP
-	default n
+
+config PPC_INDIRECT_PIO
+	bool
+	select PPC_INDIRECT_IO
+
+config PPC_INDIRECT_MMIO
+	bool
+	select PPC_INDIRECT_IO
+
+config PPC_IO_WORKAROUNDS
+	bool
 
 config GENERIC_IOMAP
 	bool
-	default n
 
 source "drivers/cpufreq/Kconfig"
 
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 111138c55f9c..2165b65876f9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -73,6 +73,7 @@ config PPC_BOOK3S_64
 config PPC_BOOK3E_64
 	bool "Embedded processors"
 	select PPC_FPU # Make it a choice ?
+	select PPC_SMP_MUXED_IPI
 
 endchoice
 
@@ -107,6 +108,10 @@ config POWER4
 	depends on PPC64 && PPC_BOOK3S
 	def_bool y
 
+config PPC_A2
+	bool
+	depends on PPC_BOOK3E_64
+
 config TUNE_CELL
 	bool "Optimize for Cell Broadband Engine"
 	depends on PPC64 && PPC_BOOK3S
@@ -174,6 +179,7 @@ config FSL_BOOKE
 config PPC_FSL_BOOK3E
 	bool
 	select FSL_EMB_PERFMON
+	select PPC_SMP_MUXED_IPI
 	default y if FSL_BOOKE
 
 config PTE_64BIT
@@ -226,6 +232,24 @@ config VSX
 
 	  If in doubt, say Y here.
 
+config PPC_ICSWX
+	bool "Support for PowerPC icswx coprocessor instruction"
+	depends on POWER4
+	default n
+	---help---
+
+	  This option enables kernel support for the PowerPC Initiate
+	  Coprocessor Store Word (icswx) coprocessor instruction on POWER7
+	  or newer processors.
+
+	  This option is only useful if you have a processor that supports
+	  the icswx coprocessor instruction. It does not have any effect
+	  on processors without the icswx coprocessor instruction.
+
+	  This option slightly increases kernel memory usage.
+
+	  If in doubt, say N here.
+
 config SPE
 	bool "SPE Support"
 	depends on E200 || (E500 && !PPC_E500MC)
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index fdb9f0b0d7a8..73e2116cfeed 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
 obj-$(CONFIG_AMIGAONE)		+= amigaone/
+obj-$(CONFIG_PPC_WSP)		+= wsp/
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 81239ebed83f..67d5009b4e86 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -6,7 +6,8 @@ config PPC_CELL_COMMON
 	bool
 	select PPC_CELL
 	select PPC_DCR_MMIO
-	select PPC_INDIRECT_IO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
 	select PPC_NATIVE
 	select PPC_RTAS
 	select IRQ_EDGE_EOI_HANDLER
@@ -15,6 +16,7 @@ config PPC_CELL_NATIVE
 	bool
 	select PPC_CELL_COMMON
 	select MPIC
+	select PPC_IO_WORKAROUNDS
 	select IBM_NEW_EMAC_EMAC4
 	select IBM_NEW_EMAC_RGMII
 	select IBM_NEW_EMAC_ZMII #test only
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 83fafe922641..a4a89350bcfc 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_PPC_CELL_COMMON)		+= cbe_regs.o interrupt.o pervasive.o
 
 obj-$(CONFIG_PPC_CELL_NATIVE)		+= iommu.o setup.o spider-pic.o \
-					   pmu.o io-workarounds.o spider-pci.o
+					   pmu.o spider-pci.o
 obj-$(CONFIG_CBE_RAS)			+= ras.o
 
 obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
@@ -39,11 +39,10 @@ obj-y					+= celleb_setup.o \
 					   celleb_pci.o celleb_scc_epci.o \
 					   celleb_scc_pciex.o \
 					   celleb_scc_uhc.o \
-					   io-workarounds.o spider-pci.o \
-					   beat.o beat_htab.o beat_hvCall.o \
-					   beat_interrupt.o beat_iommu.o
+					   spider-pci.o beat.o beat_htab.o \
+					   beat_hvCall.o beat_interrupt.o \
+					   beat_iommu.o
 
-obj-$(CONFIG_SMP)			+= beat_smp.o
 obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
 obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
 obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index bb5ebf8fa80b..ac06903e136a 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -113,7 +113,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 		pr_devel("axon_msi: woff %x roff %x msi %x\n",
 			  write_offset, msic->read_offset, msi);
 
-		if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
+		if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) {
 			generic_handle_irq(msi);
 			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
 		} else {
@@ -320,6 +320,7 @@ static struct irq_chip msic_irq_chip = {
 static int msic_host_map(struct irq_host *h, unsigned int virq,
 			 irq_hw_number_t hw)
 {
+	irq_set_chip_data(virq, h->host_data);
 	irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
 
 	return 0;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 4cb9e147c307..55015e1f6939 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -148,16 +148,6 @@ static int beatic_pic_host_map(struct irq_host *h, unsigned int virq,
 }
 
 /*
- * Update binding hardware IRQ number (hw) and Virtuql
- * IRQ number (virq). This is called only once for a given mapping.
- */
-static void beatic_pic_host_remap(struct irq_host *h, unsigned int virq,
-			       irq_hw_number_t hw)
-{
-	beat_construct_and_connect_irq_plug(virq, hw);
-}
-
-/*
  * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
  * to pass away to irq_create_mapping().
  *
@@ -184,7 +174,6 @@ static int beatic_pic_host_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops beatic_pic_host_ops = {
 	.map = beatic_pic_host_map,
-	.remap = beatic_pic_host_remap,
 	.unmap = beatic_pic_host_unmap,
 	.xlate = beatic_pic_host_xlate,
 	.match = beatic_pic_host_match,
@@ -257,22 +246,6 @@ void __init beatic_init_IRQ(void)
 	irq_set_default_host(beatic_host);
 }
 
-#ifdef CONFIG_SMP
-
-/* Nullified to compile with SMP mode */
-void beatic_setup_cpu(int cpu)
-{
-}
-
-void beatic_cause_IPI(int cpu, int mesg)
-{
-}
-
-void beatic_request_IPIs(void)
-{
-}
-#endif /* CONFIG_SMP */
-
 void beatic_deinit_IRQ(void)
 {
 	int	i;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
index b470fd0051f1..a7e52f91a078 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.h
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
@@ -24,9 +24,6 @@
 
 extern void beatic_init_IRQ(void);
 extern unsigned int beatic_get_irq(void);
-extern void beatic_cause_IPI(int cpu, int mesg);
-extern void beatic_request_IPIs(void);
-extern void beatic_setup_cpu(int);
 extern void beatic_deinit_IRQ(void);
 
 #endif
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
deleted file mode 100644
index 26efc204c47f..000000000000
--- a/arch/powerpc/platforms/cell/beat_smp.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * SMP support for Celleb platform. (Incomplete)
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/cell/smp.c:
- * Dave Engebretsen, Peter Bergner, and
- * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- * Plus various changes from other IBM teams...
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
-
-#include <asm/irq.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "beat_interrupt.h"
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/*
- * The primary thread of each non-boot processor is recorded here before
- * smp init.
- */
-/* static cpumask_t of_spin_map; */
-
-/**
- * smp_startup_cpu() - start the given cpu
- *
- * At boot time, there is nothing to do for primary threads which were
- * started from Open Firmware.  For anything else, call RTAS with the
- * appropriate start location.
- *
- * Returns:
- *	0	- failure
- *	1	- success
- */
-static inline int __devinit smp_startup_cpu(unsigned int lcpu)
-{
-	return 0;
-}
-
-static void smp_beatic_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		beatic_cause_IPI(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			beatic_cause_IPI(i, msg);
-		}
-	}
-}
-
-static int __init smp_beatic_probe(void)
-{
-	return cpus_weight(cpu_possible_map);
-}
-
-static void __devinit smp_beatic_setup_cpu(int cpu)
-{
-	beatic_setup_cpu(cpu);
-}
-
-static void __devinit smp_celleb_kick_cpu(int nr)
-{
-	BUG_ON(nr < 0 || nr >= NR_CPUS);
-
-	if (!smp_startup_cpu(nr))
-		return;
-}
-
-static int smp_celleb_cpu_bootable(unsigned int nr)
-{
-	return 1;
-}
-static struct smp_ops_t bpa_beatic_smp_ops = {
-	.message_pass	= smp_beatic_message_pass,
-	.probe		= smp_beatic_probe,
-	.kick_cpu	= smp_celleb_kick_cpu,
-	.setup_cpu	= smp_beatic_setup_cpu,
-	.cpu_bootable	= smp_celleb_cpu_bootable,
-};
-
-/* This is called very early */
-void __init smp_init_celleb(void)
-{
-	DBG(" -> smp_init_celleb()\n");
-
-	smp_ops = &bpa_beatic_smp_ops;
-
-	DBG(" <- smp_init_celleb()\n");
-}
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index dbc338f187a2..f3917e7a5b44 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -45,8 +45,8 @@ static struct cbe_thread_map
 	unsigned int cbe_id;
 } cbe_thread_map[NR_CPUS];
 
-static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE };
-static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE;
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
 
 static struct cbe_regs_map *cbe_find_map(struct device_node *np)
 {
@@ -159,7 +159,8 @@ EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
 
 u32 cbe_node_to_cpu(int node)
 {
-	return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t));
+	return cpumask_first(&cbe_local_mask[node]);
+
 }
 EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
 
@@ -268,9 +269,9 @@ void __init cbe_regs_init(void)
 				thread->regs = map;
 				thread->cbe_id = cbe_id;
 				map->be_node = thread->be_node;
-				cpu_set(i, cbe_local_mask[cbe_id]);
+				cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
 				if(thread->thread_id == 0)
-					cpu_set(i, cbe_first_online_cpu);
+					cpumask_set_cpu(i, &cbe_first_online_cpu);
 			}
 		}
 
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index 404d1fc04d59..5822141aa63f 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -41,7 +41,6 @@
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
-#include "io-workarounds.h"
 #include "celleb_pci.h"
 
 #define MAX_PCI_DEVICES    32
@@ -320,7 +319,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
 
 	size = 256;
 	config = &private->fake_config[devno][fn];
-	*config = alloc_maybe_bootmem(size, GFP_KERNEL);
+	*config = zalloc_maybe_bootmem(size, GFP_KERNEL);
 	if (*config == NULL) {
 		printk(KERN_ERR "PCI: "
 		       "not enough memory for fake configuration space\n");
@@ -331,7 +330,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
 
 	size = sizeof(struct celleb_pci_resource);
 	res = &private->res[devno][fn];
-	*res = alloc_maybe_bootmem(size, GFP_KERNEL);
+	*res = zalloc_maybe_bootmem(size, GFP_KERNEL);
 	if (*res == NULL) {
 		printk(KERN_ERR
 		       "PCI: not enough memory for resource data space\n");
@@ -432,7 +431,7 @@ static int __init phb_set_bus_ranges(struct device_node *dev,
 static void __init celleb_alloc_private_mem(struct pci_controller *hose)
 {
 	hose->private_data =
-		alloc_maybe_bootmem(sizeof(struct celleb_pci_private),
+		zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
 			GFP_KERNEL);
 }
 
@@ -469,18 +468,6 @@ static struct of_device_id celleb_phb_match[] __initdata = {
 	},
 };
 
-static int __init celleb_io_workaround_init(struct pci_controller *phb,
-					    struct celleb_phb_spec *phb_spec)
-{
-	if (phb_spec->ops) {
-		iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init,
-				  phb_spec->iowa_data);
-		io_workaround_init();
-	}
-
-	return 0;
-}
-
 int __init celleb_setup_phb(struct pci_controller *phb)
 {
 	struct device_node *dev = phb->dn;
@@ -500,7 +487,11 @@ int __init celleb_setup_phb(struct pci_controller *phb)
 	if (rc)
 		return 1;
 
-	return celleb_io_workaround_init(phb, phb_spec);
+	if (phb_spec->ops)
+		iowa_register_bus(phb, phb_spec->ops,
+				  phb_spec->iowa_init,
+				  phb_spec->iowa_data);
+	return 0;
 }
 
 int celleb_pci_probe_mode(struct pci_bus *bus)
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
index 4cba1523ec50..a801fcc5f389 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.h
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -26,8 +26,9 @@
 #include <asm/pci-bridge.h>
 #include <asm/prom.h>
 #include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
 
-#include "io-workarounds.h"
+struct iowa_bus;
 
 struct celleb_phb_spec {
 	int (*setup)(struct device_node *, struct pci_controller *);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
index e53845579770..d58d9bae4b9b 100644
--- a/arch/powerpc/platforms/cell/celleb_setup.c
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -128,10 +128,6 @@ static void __init celleb_setup_arch_beat(void)
 	spu_management_ops	= &spu_management_of_ops;
 #endif
 
-#ifdef CONFIG_SMP
-	smp_init_celleb();
-#endif
-
 	celleb_setup_arch_common();
 }
 
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 44cfd1bef89b..449c08c15862 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -196,8 +196,20 @@ static irqreturn_t iic_ipi_action(int irq, void *dev_id)
 {
 	int ipi = (int)(long)dev_id;
 
-	smp_message_recv(ipi);
-
+	switch(ipi) {
+	case PPC_MSG_CALL_FUNCTION:
+		generic_smp_call_function_interrupt();
+		break;
+	case PPC_MSG_RESCHEDULE:
+		scheduler_ipi();
+		break;
+	case PPC_MSG_CALL_FUNC_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
+	case PPC_MSG_DEBUGGER_BREAK:
+		debug_ipi_action(0, NULL);
+		break;
+	}
 	return IRQ_HANDLED;
 }
 static void iic_request_ipi(int ipi, const char *name)
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
index d31c594cfdf3..51e290126bc1 100644
--- a/arch/powerpc/platforms/cell/qpace_setup.c
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -42,7 +42,6 @@
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
-#include "io-workarounds.h"
 
 static void qpace_show_cpuinfo(struct seq_file *m)
 {
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index fd57bfe00edf..c73cf4c43fc2 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -51,11 +51,11 @@
 #include <asm/udbg.h>
 #include <asm/mpic.h>
 #include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
 
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
-#include "io-workarounds.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -136,8 +136,6 @@ static int __devinit cell_setup_phb(struct pci_controller *phb)
 
 	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
 				  (void *)SPIDER_PCI_REG_BASE);
-	io_workaround_init();
-
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index f774530075b7..d176e6148e3f 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -77,7 +77,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	unsigned int pcpu;
 	int start_cpu;
 
-	if (cpu_isset(lcpu, of_spin_map))
+	if (cpumask_test_cpu(lcpu, &of_spin_map))
 		/* Already started by OF and sitting in spin loop */
 		return 1;
 
@@ -103,27 +103,11 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	return 1;
 }
 
-static void smp_iic_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		iic_cause_IPI(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			iic_cause_IPI(i, msg);
-		}
-	}
-}
-
 static int __init smp_iic_probe(void)
 {
 	iic_request_IPIs();
 
-	return cpus_weight(cpu_possible_map);
+	return cpumask_weight(cpu_possible_mask);
 }
 
 static void __devinit smp_cell_setup_cpu(int cpu)
@@ -137,12 +121,12 @@ static void __devinit smp_cell_setup_cpu(int cpu)
 	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
 }
 
-static void __devinit smp_cell_kick_cpu(int nr)
+static int __devinit smp_cell_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
-		return;
+		return -ENOENT;
 
 	/*
 	 * The processor is currently spinning, waiting for the
@@ -150,6 +134,8 @@ static void __devinit smp_cell_kick_cpu(int nr)
 	 * the processor will continue on to secondary_start
 	 */
 	paca[nr].cpu_start = 1;
+
+	return 0;
 }
 
 static int smp_cell_cpu_bootable(unsigned int nr)
@@ -166,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
 	return 1;
 }
 static struct smp_ops_t bpa_iic_smp_ops = {
-	.message_pass	= smp_iic_message_pass,
+	.message_pass	= iic_cause_IPI,
 	.probe		= smp_iic_probe,
 	.kick_cpu	= smp_cell_kick_cpu,
 	.setup_cpu	= smp_cell_setup_cpu,
@@ -186,13 +172,12 @@ void __init smp_init_cell(void)
 	if (cpu_has_feature(CPU_FTR_SMT)) {
 		for_each_present_cpu(i) {
 			if (cpu_thread_in_core(i) == 0)
-				cpu_set(i, of_spin_map);
+				cpumask_set_cpu(i, &of_spin_map);
 		}
-	} else {
-		of_spin_map = cpu_present_map;
-	}
+	} else
+		cpumask_copy(&of_spin_map, cpu_present_mask);
 
-	cpu_clear(boot_cpuid, of_spin_map);
+	cpumask_clear_cpu(boot_cpuid, &of_spin_map);
 
 	/* Non-lpar has additional take/give timebase */
 	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index ca7731c0b595..f1f7878893f3 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -27,8 +27,7 @@
 
 #include <asm/ppc-pci.h>
 #include <asm/pci-bridge.h>
-
-#include "io-workarounds.h"
+#include <asm/io-workarounds.h>
 
 #define SPIDER_PCI_DISABLE_PREFETCH
 
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index c5cf50e6b45a..442c28c00f88 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -68,9 +68,9 @@ struct spider_pic {
 };
 static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
 
-static struct spider_pic *spider_virq_to_pic(unsigned int virq)
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
 {
-	return irq_map[virq].host->host_data;
+	return irq_data_get_irq_chip_data(d);
 }
 
 static void __iomem *spider_get_irq_config(struct spider_pic *pic,
@@ -81,24 +81,24 @@ static void __iomem *spider_get_irq_config(struct spider_pic *pic,
 
 static void spider_unmask_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) | 0x30000000u);
 }
 
 static void spider_mask_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
 }
 
 static void spider_ack_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	unsigned int src = irq_map[d->irq].hwirq;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int src = irqd_to_hwirq(d);
 
 	/* Reset edge detection logic if necessary
 	 */
@@ -116,8 +116,8 @@ static void spider_ack_irq(struct irq_data *d)
 static int spider_set_irq_type(struct irq_data *d, unsigned int type)
 {
 	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	unsigned int hw = irq_map[d->irq].hwirq;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int hw = irqd_to_hwirq(d);
 	void __iomem *cfg = spider_get_irq_config(pic, hw);
 	u32 old_mask;
 	u32 ic;
@@ -171,6 +171,7 @@ static struct irq_chip spider_pic = {
 static int spider_host_map(struct irq_host *h, unsigned int virq,
 			irq_hw_number_t hw)
 {
+	irq_set_chip_data(virq, h->host_data);
 	irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
 
 	/* Set default irq type */
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index acfaccea5f4f..3675da73623f 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -32,6 +32,7 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
 #include <asm/spu_csa.h>
@@ -521,18 +522,8 @@ void spu_init_channels(struct spu *spu)
 }
 EXPORT_SYMBOL_GPL(spu_init_channels);
 
-static int spu_shutdown(struct sys_device *sysdev)
-{
-	struct spu *spu = container_of(sysdev, struct spu, sysdev);
-
-	spu_free_irqs(spu);
-	spu_destroy_spu(spu);
-	return 0;
-}
-
 static struct sysdev_class spu_sysdev_class = {
 	.name = "spu",
-	.shutdown = spu_shutdown,
 };
 
 int spu_add_sysdev_attr(struct sysdev_attribute *attr)
@@ -797,6 +788,22 @@ static inline void crash_register_spus(struct list_head *list)
 }
 #endif
 
+static void spu_shutdown(void)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		spu_free_irqs(spu);
+		spu_destroy_spu(spu);
+	}
+	mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+	.shutdown = spu_shutdown,
+};
+
 static int __init init_spu_base(void)
 {
 	int i, ret = 0;
@@ -830,6 +837,7 @@ static int __init init_spu_base(void)
 	crash_register_spus(&spu_full_list);
 	mutex_unlock(&spu_full_list_mutex);
 	spu_add_sysdev_attr(&attr_stat);
+	register_syscore_ops(&spu_syscore_ops);
 
 	spu_init_affinity();
 
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 65203857b0ce..32cb4e66d2cd 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
 	 * runqueue. The context will be rescheduled on the proper node
 	 * if it is timesliced or preempted.
 	 */
-	ctx->cpus_allowed = current->cpus_allowed;
+	cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
 
 	/* Save the current cpu id for spu interrupt routing. */
 	ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index 02cafecc90e3..a800122e4dda 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -30,10 +30,12 @@
 #include <asm/mpic.h>
 #include <asm/rtas.h>
 
-static void __devinit smp_chrp_kick_cpu(int nr)
+static int __devinit smp_chrp_kick_cpu(int nr)
 {
 	*(unsigned long *)KERNELBASE = nr;
 	asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory");
+
+	return 0;
 }
 
 static void __devinit smp_chrp_setup_cpu(int cpu_nr)
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 12aa62b6f227..f61a2dd96b99 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -48,7 +48,7 @@
 
 static void flipper_pic_mask_and_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << irq;
 
@@ -59,7 +59,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d)
 
 static void flipper_pic_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	/* this is at least needed for RSW */
@@ -68,7 +68,7 @@ static void flipper_pic_ack(struct irq_data *d)
 
 static void flipper_pic_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	clrbits32(io_base + FLIPPER_IMR, 1 << irq);
@@ -76,7 +76,7 @@ static void flipper_pic_mask(struct irq_data *d)
 
 static void flipper_pic_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + FLIPPER_IMR, 1 << irq);
@@ -107,12 +107,6 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void flipper_pic_unmap(struct irq_host *h, unsigned int irq)
-{
-	irq_set_chip_data(irq, NULL);
-	irq_set_chip(irq, NULL);
-}
-
 static int flipper_pic_match(struct irq_host *h, struct device_node *np)
 {
 	return 1;
@@ -121,7 +115,6 @@ static int flipper_pic_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops flipper_irq_host_ops = {
 	.map = flipper_pic_map,
-	.unmap = flipper_pic_unmap,
 	.match = flipper_pic_match,
 };
 
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 2bdddfc9d520..e4919170c6bc 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -43,7 +43,7 @@
 
 static void hlwd_pic_mask_and_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << irq;
 
@@ -53,7 +53,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d)
 
 static void hlwd_pic_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	out_be32(io_base + HW_BROADWAY_ICR, 1 << irq);
@@ -61,7 +61,7 @@ static void hlwd_pic_ack(struct irq_data *d)
 
 static void hlwd_pic_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
@@ -69,7 +69,7 @@ static void hlwd_pic_mask(struct irq_data *d)
 
 static void hlwd_pic_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
@@ -100,15 +100,8 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void hlwd_pic_unmap(struct irq_host *h, unsigned int irq)
-{
-	irq_set_chip_data(irq, NULL);
-	irq_set_chip(irq, NULL);
-}
-
 static struct irq_host_ops hlwd_irq_host_ops = {
 	.map = hlwd_pic_map,
-	.unmap = hlwd_pic_unmap,
 };
 
 static unsigned int __hlwd_pic_get_irq(struct irq_host *h)
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index e5bc9f75d474..b57cda3a0817 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -1,7 +1,9 @@
 config PPC_ISERIES
 	bool "IBM Legacy iSeries"
 	depends on PPC64 && PPC_BOOK3S
-	select PPC_INDIRECT_IO
+	select PPC_SMP_MUXED_IPI
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
 	select PPC_PCI_CHOICE if EXPERT
 
 menu "iSeries device drivers"
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index 32a56c6dfa72..29c02f36b32f 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -31,6 +31,7 @@
 #include <asm/thread_info.h>
 #include <asm/ptrace.h>
 #include <asm/cputable.h>
+#include <asm/mmu.h>
 
 #include "exception.h"
 
@@ -60,29 +61,31 @@ system_reset_iSeries:
 /* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
 /* In the UP case we'll yield() later, and we will not access the paca anyway */
 #ifdef CONFIG_SMP
-1:
+iSeries_secondary_wait_paca:
 	HMT_LOW
 	LOAD_REG_ADDR(r23, __secondary_hold_spinloop)
 	ld	r23,0(r23)
-	sync
-	LOAD_REG_ADDR(r3,current_set)
-	sldi	r28,r24,3		/* get current_set[cpu#] */
-	ldx	r3,r3,r28
-	addi	r1,r3,THREAD_SIZE
-	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0			/* Keep poking the Hypervisor until */
-	bne	2f			/* we're released */
-	/* Let the Hypervisor know we are alive */
+	cmpdi	0,r23,0
+	bne	2f			/* go on when the master is ready */
+
+	/* Keep poking the Hypervisor until we're released */
 	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
 	lis	r3,0x8002
 	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
 	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
 	sc				/* Invoke the hypervisor via a system call */
-	b	1b
-#endif
+	b	iSeries_secondary_wait_paca
 
 2:
+	HMT_MEDIUM
+	sync
+
+	LOAD_REG_ADDR(r3, nr_cpu_ids)	/* get number of pacas allocated */
+	lwz	r3,0(r3)		/* nr_cpus= or NR_CPUS can limit */
+	cmpld	0,r24,r3		/* is our cpu number allocated? */
+	bge	iSeries_secondary_yield	/* no, yield forever */
+
 	/* Load our paca now that it's been allocated */
 	LOAD_REG_ADDR(r13, paca)
 	ld	r13,0(r13)
@@ -93,10 +96,24 @@ system_reset_iSeries:
 	ori	r23,r23,MSR_RI
 	mtmsrd	r23			/* RI on */
 
-	HMT_LOW
-#ifdef CONFIG_SMP
+iSeries_secondary_smp_loop:
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor
 					 * should start */
+	cmpwi	0,r23,0
+	bne	3f			/* go on when we are told */
+
+	HMT_LOW
+	/* Let the Hypervisor know we are alive */
+	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
+	lis	r3,0x8002
+	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
+	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
+	sc				/* Invoke the hypervisor via a system call */
+	mfspr	r13,SPRN_SPRG_PACA	/* Put r13 back ???? */
+	b	iSeries_secondary_smp_loop /* wait for signal to start */
+
+3:
+	HMT_MEDIUM
 	sync
 	LOAD_REG_ADDR(r3,current_set)
 	sldi	r28,r24,3		/* get current_set[cpu#] */
@@ -104,27 +121,22 @@ system_reset_iSeries:
 	addi	r1,r3,THREAD_SIZE
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0
-	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
 	b	__secondary_start		/* Loop until told to go */
-iSeries_secondary_smp_loop:
-	/* Let the Hypervisor know we are alive */
-	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
-	lis	r3,0x8002
-	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
-#else /* CONFIG_SMP */
+#endif /* CONFIG_SMP */
+
+iSeries_secondary_yield:
 	/* Yield the processor.  This is required for non-SMP kernels
 		which are running on multi-threaded machines. */
+	HMT_LOW
 	lis	r3,0x8000
 	rldicr	r3,r3,32,15		/* r3 = (r3 << 32) & 0xffff000000000000 */
 	addi	r3,r3,18		/* r3 = 0x8000000000000012 which is "yield" */
 	li	r4,0			/* "yield timed" */
 	li	r5,-1			/* "yield forever" */
-#endif /* CONFIG_SMP */
 	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
 	sc				/* Invoke the hypervisor via a system call */
 	mfspr	r13,SPRN_SPRG_PACA	/* Put r13 back ???? */
-	b	2b			/* If SMP not configured, secondaries
+	b	iSeries_secondary_yield	/* If SMP not configured, secondaries
 					 * loop forever */
 
 /***  ISeries-LPAR interrupt handlers ***/
@@ -157,7 +169,7 @@ BEGIN_FTR_SECTION
 FTR_SECTION_ELSE
 	EXCEPTION_PROLOG_1(PACA_EXGEN)
 	EXCEPTION_PROLOG_ISERIES_1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
 	b	data_access_common
 
 .do_stab_bolted_iSeries:
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 52a6889832c7..b2103453eb01 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -42,7 +42,6 @@
 #include "irq.h"
 #include "pci.h"
 #include "call_pci.h"
-#include "smp.h"
 
 #ifdef CONFIG_PCI
 
@@ -171,7 +170,7 @@ static void iseries_enable_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -188,7 +187,7 @@ static unsigned int iseries_startup_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	bus = REAL_IRQ_TO_BUS(rirq);
 	function = REAL_IRQ_TO_FUNC(rirq);
@@ -234,7 +233,7 @@ static void iseries_shutdown_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* irq should be locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -257,7 +256,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -271,7 +270,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
 
 static void iseries_end_IRQ(struct irq_data *d)
 {
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq),
 		(REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq));
@@ -316,7 +315,7 @@ unsigned int iSeries_get_irq(void)
 #ifdef CONFIG_SMP
 	if (get_lppaca()->int_dword.fields.ipi_cnt) {
 		get_lppaca()->int_dword.fields.ipi_cnt = 0;
-		iSeries_smp_message_recv();
+		smp_ipi_demux();
 	}
 #endif /* CONFIG_SMP */
 	if (hvlpevent_is_pending())
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 2946ae10fbfd..c25a0815c26b 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -249,7 +249,7 @@ static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array,
 	unsigned long i;
 	unsigned long mem_blocks = 0;
 
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array,
 				max_entries);
 	else
@@ -634,7 +634,7 @@ static int __init iseries_probe(void)
 
 	hpte_init_iSeries();
 	/* iSeries does not support 16M pages */
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_16M_PAGE;
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_16M_PAGE;
 
 	return 1;
 }
@@ -685,6 +685,11 @@ void * __init iSeries_early_setup(void)
 	powerpc_firmware_features |= FW_FEATURE_ISERIES;
 	powerpc_firmware_features |= FW_FEATURE_LPAR;
 
+#ifdef CONFIG_SMP
+	/* On iSeries we know we can never have more than 64 cpus */
+	nr_cpu_ids = max(nr_cpu_ids, 64);
+#endif
+
 	iSeries_fixup_klimit();
 
 	/*
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 6c6029914dbc..e3265adde5d3 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -42,57 +42,23 @@
 #include <asm/cputable.h>
 #include <asm/system.h>
 
-#include "smp.h"
-
-static unsigned long iSeries_smp_message[NR_CPUS];
-
-void iSeries_smp_message_recv(void)
-{
-	int cpu = smp_processor_id();
-	int msg;
-
-	if (num_online_cpus() < 2)
-		return;
-
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &iSeries_smp_message[cpu]))
-			smp_message_recv(msg);
-}
-
-static inline void smp_iSeries_do_message(int cpu, int msg)
+static void smp_iSeries_cause_ipi(int cpu, unsigned long data)
 {
-	set_bit(msg, &iSeries_smp_message[cpu]);
 	HvCall_sendIPI(&(paca[cpu]));
 }
 
-static void smp_iSeries_message_pass(int target, int msg)
-{
-	int i;
-
-	if (target < NR_CPUS)
-		smp_iSeries_do_message(target, msg);
-	else {
-		for_each_online_cpu(i) {
-			if ((target == MSG_ALL_BUT_SELF) &&
-					(i == smp_processor_id()))
-				continue;
-			smp_iSeries_do_message(i, msg);
-		}
-	}
-}
-
 static int smp_iSeries_probe(void)
 {
 	return cpumask_weight(cpu_possible_mask);
 }
 
-static void smp_iSeries_kick_cpu(int nr)
+static int smp_iSeries_kick_cpu(int nr)
 {
 	BUG_ON((nr < 0) || (nr >= NR_CPUS));
 
 	/* Verify that our partition has a processor nr */
 	if (lppaca_of(nr).dyn_proc_status >= 2)
-		return;
+		return -ENOENT;
 
 	/* The processor is currently spinning, waiting
 	 * for the cpu_start field to become non-zero
@@ -100,6 +66,8 @@ static void smp_iSeries_kick_cpu(int nr)
 	 * continue on to secondary_start in iSeries_head.S
 	 */
 	paca[nr].cpu_start = 1;
+
+	return 0;
 }
 
 static void __devinit smp_iSeries_setup_cpu(int nr)
@@ -107,7 +75,8 @@ static void __devinit smp_iSeries_setup_cpu(int nr)
 }
 
 static struct smp_ops_t iSeries_smp_ops = {
-	.message_pass = smp_iSeries_message_pass,
+	.message_pass = smp_muxed_ipi_message_pass,
+	.cause_ipi    = smp_iSeries_cause_ipi,
 	.probe        = smp_iSeries_probe,
 	.kick_cpu     = smp_iSeries_kick_cpu,
 	.setup_cpu    = smp_iSeries_setup_cpu,
diff --git a/arch/powerpc/platforms/iseries/smp.h b/arch/powerpc/platforms/iseries/smp.h
deleted file mode 100644
index d501f7de01e7..000000000000
--- a/arch/powerpc/platforms/iseries/smp.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _PLATFORMS_ISERIES_SMP_H
-#define _PLATFORMS_ISERIES_SMP_H
-
-extern void iSeries_smp_message_recv(void);
-
-#endif	/* _PLATFORMS_ISERIES_SMP_H */
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index 1e1a0873e1dd..1afd10f67858 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -18,4 +18,13 @@ config PPC_PMAC64
 	select PPC_970_NAP
 	default y
 
-
+config PPC_PMAC32_PSURGE
+	bool "Support for powersurge upgrade cards" if EXPERT
+	depends on SMP && PPC32 && PPC_PMAC
+	select PPC_SMP_MUXED_IPI
+	default y
+	help
+	  The powersurge cpu boards can be used in the generation
+	  of powermacs that have a socket for an upgradeable cpu card,
+	  including the 7500, 8500, 9500, 9600.  Support exists for
+	  both dual and quad socket upgrade cards.
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 023f24086a0a..9089b0421191 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -21,7 +21,7 @@
 #include <linux/signal.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
 #include <linux/module.h>
@@ -84,7 +84,7 @@ static void __pmac_retrigger(unsigned int irq_nr)
 
 static void pmac_mask_and_ack_irq(struct irq_data *d)
 {
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
         unsigned long flags;
@@ -106,7 +106,7 @@ static void pmac_mask_and_ack_irq(struct irq_data *d)
 
 static void pmac_ack_irq(struct irq_data *d)
 {
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
         unsigned long flags;
@@ -152,7 +152,7 @@ static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
 static unsigned int pmac_startup_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
 
@@ -169,7 +169,7 @@ static unsigned int pmac_startup_irq(struct irq_data *d)
 static void pmac_mask_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
         __clear_bit(src, ppc_cached_irq_mask);
@@ -180,7 +180,7 @@ static void pmac_mask_irq(struct irq_data *d)
 static void pmac_unmask_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
 	__set_bit(src, ppc_cached_irq_mask);
@@ -193,7 +193,7 @@ static int pmac_retrigger(struct irq_data *d)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
-	__pmac_retrigger(irq_map[d->irq].hwirq);
+	__pmac_retrigger(irqd_to_hwirq(d));
 	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
 	return 1;
 }
@@ -239,15 +239,12 @@ static unsigned int pmac_pic_get_irq(void)
 	unsigned long bits = 0;
 	unsigned long flags;
 
-#ifdef CONFIG_SMP
-	void psurge_smp_message_recv(void);
-
-       	/* IPI's are a hack on the powersurge -- Cort */
-       	if ( smp_processor_id() != 0 ) {
-		psurge_smp_message_recv();
-		return NO_IRQ_IGNORE;	/* ignore, already handled */
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+	/* IPI's are a hack on the powersurge -- Cort */
+	if (smp_processor_id() != 0) {
+		return  psurge_secondary_virq;
         }
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
 	for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
 		int i = irq >> 5;
@@ -677,7 +674,7 @@ not_found:
 	return viaint;
 }
 
-static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state)
+static int pmacpic_suspend(void)
 {
 	int viaint = pmacpic_find_viaint();
 
@@ -698,7 +695,7 @@ static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state)
         return 0;
 }
 
-static int pmacpic_resume(struct sys_device *sysdev)
+static void pmacpic_resume(void)
 {
 	int i;
 
@@ -709,39 +706,19 @@ static int pmacpic_resume(struct sys_device *sysdev)
 	for (i = 0; i < max_real_irqs; ++i)
 		if (test_bit(i, sleep_save_mask))
 			pmac_unmask_irq(irq_get_irq_data(i));
-
-	return 0;
 }
 
-#endif /* CONFIG_PM && CONFIG_PPC32 */
-
-static struct sysdev_class pmacpic_sysclass = {
-	.name = "pmac_pic",
-};
-
-static struct sys_device device_pmacpic = {
-	.id		= 0,
-	.cls		= &pmacpic_sysclass,
-};
-
-static struct sysdev_driver driver_pmacpic = {
-#if defined(CONFIG_PM) && defined(CONFIG_PPC32)
-	.suspend	= &pmacpic_suspend,
-	.resume		= &pmacpic_resume,
-#endif /* CONFIG_PM && CONFIG_PPC32 */
+static struct syscore_ops pmacpic_syscore_ops = {
+	.suspend	= pmacpic_suspend,
+	.resume		= pmacpic_resume,
 };
 
-static int __init init_pmacpic_sysfs(void)
+static int __init init_pmacpic_syscore(void)
 {
-#ifdef CONFIG_PPC32
-	if (max_irqs == 0)
-		return -ENODEV;
-#endif
-	printk(KERN_DEBUG "Registering pmac pic with sysfs...\n");
-	sysdev_class_register(&pmacpic_sysclass);
-	sysdev_register(&device_pmacpic);
-	sysdev_driver_register(&pmacpic_sysclass, &driver_pmacpic);
+	register_syscore_ops(&pmacpic_syscore_ops);
 	return 0;
 }
-machine_subsys_initcall(powermac, init_pmacpic_sysfs);
 
+machine_subsys_initcall(powermac, init_pmacpic_syscore);
+
+#endif /* CONFIG_PM && CONFIG_PPC32 */
diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h
deleted file mode 100644
index d622a8345aaa..000000000000
--- a/arch/powerpc/platforms/powermac/pic.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __PPC_PLATFORMS_PMAC_PIC_H
-#define __PPC_PLATFORMS_PMAC_PIC_H
-
-#include <linux/irq.h>
-
-extern struct irq_chip pmac_pic;
-
-extern void pmac_pic_init(void);
-extern int pmac_get_irq(void);
-
-#endif /* __PPC_PLATFORMS_PMAC_PIC_H */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 20468f49aec0..8327cce2bdb0 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -33,6 +33,7 @@ extern void pmac_setup_pci_dma(void);
 extern void pmac_check_ht_link(void);
 
 extern void pmac_setup_smp(void);
+extern int psurge_secondary_virq;
 extern void low_cpu_die(void) __attribute__((noreturn));
 
 extern int pmac_nvram_init(void);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index a830c5e80657..db092d7c4c5b 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -70,7 +70,7 @@ static void (*pmac_tb_freeze)(int freeze);
 static u64 timebase;
 static int tb_req;
 
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
 
 /*
  * Powersurge (old powermac SMP) support.
@@ -124,6 +124,10 @@ static volatile u32 __iomem *psurge_start;
 /* what sort of powersurge board we have */
 static int psurge_type = PSURGE_NONE;
 
+/* irq for secondary cpus to report */
+static struct irq_host *psurge_host;
+int psurge_secondary_virq;
+
 /*
  * Set and clear IPIs for powersurge.
  */
@@ -156,51 +160,52 @@ static inline void psurge_clr_ipi(int cpu)
 /*
  * On powersurge (old SMP powermac architecture) we don't have
  * separate IPIs for separate messages like openpic does.  Instead
- * we have a bitmap for each processor, where a 1 bit means that
- * the corresponding message is pending for that processor.
- * Ideally each cpu's entry would be in a different cache line.
+ * use the generic demux helpers
  *  -- paulus.
  */
-static unsigned long psurge_smp_message[NR_CPUS];
-
-void psurge_smp_message_recv(void)
+static irqreturn_t psurge_ipi_intr(int irq, void *d)
 {
-	int cpu = smp_processor_id();
-	int msg;
-
-	/* clear interrupt */
-	psurge_clr_ipi(cpu);
+	psurge_clr_ipi(smp_processor_id());
+	smp_ipi_demux();
 
-	if (num_online_cpus() < 2)
-		return;
+	return IRQ_HANDLED;
+}
 
-	/* make sure there is a message there */
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &psurge_smp_message[cpu]))
-			smp_message_recv(msg);
+static void smp_psurge_cause_ipi(int cpu, unsigned long data)
+{
+	psurge_set_ipi(cpu);
 }
 
-irqreturn_t psurge_primary_intr(int irq, void *d)
+static int psurge_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
 {
-	psurge_smp_message_recv();
-	return IRQ_HANDLED;
+	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq);
+
+	return 0;
 }
 
-static void smp_psurge_message_pass(int target, int msg)
+struct irq_host_ops psurge_host_ops = {
+	.map	= psurge_host_map,
+};
+
+static int psurge_secondary_ipi_init(void)
 {
-	int i;
+	int rc = -ENOMEM;
 
-	if (num_online_cpus() < 2)
-		return;
+	psurge_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0,
+		&psurge_host_ops, 0);
 
-	for_each_online_cpu(i) {
-		if (target == MSG_ALL
-		    || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())
-		    || target == i) {
-			set_bit(msg, &psurge_smp_message[i]);
-			psurge_set_ipi(i);
-		}
-	}
+	if (psurge_host)
+		psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
+
+	if (psurge_secondary_virq)
+		rc = request_irq(psurge_secondary_virq, psurge_ipi_intr,
+			IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
+
+	if (rc)
+		pr_err("Failed to setup secondary cpu IPI\n");
+
+	return rc;
 }
 
 /*
@@ -311,6 +316,9 @@ static int __init smp_psurge_probe(void)
 		ncpus = 2;
 	}
 
+	if (psurge_secondary_ipi_init())
+		return 1;
+
 	psurge_start = ioremap(PSURGE_START, 4);
 	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
 
@@ -329,7 +337,7 @@ static int __init smp_psurge_probe(void)
 	return ncpus;
 }
 
-static void __init smp_psurge_kick_cpu(int nr)
+static int __init smp_psurge_kick_cpu(int nr)
 {
 	unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
 	unsigned long a, flags;
@@ -394,11 +402,13 @@ static void __init smp_psurge_kick_cpu(int nr)
 		psurge_set_ipi(1);
 
 	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+
+	return 0;
 }
 
 static struct irqaction psurge_irqaction = {
-	.handler = psurge_primary_intr,
-	.flags = IRQF_DISABLED,
+	.handler = psurge_ipi_intr,
+	.flags = IRQF_DISABLED|IRQF_PERCPU,
 	.name = "primary IPI",
 };
 
@@ -437,14 +447,15 @@ void __init smp_psurge_give_timebase(void)
 
 /* PowerSurge-style Macs */
 struct smp_ops_t psurge_smp_ops = {
-	.message_pass	= smp_psurge_message_pass,
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= smp_psurge_cause_ipi,
 	.probe		= smp_psurge_probe,
 	.kick_cpu	= smp_psurge_kick_cpu,
 	.setup_cpu	= smp_psurge_setup_cpu,
 	.give_timebase	= smp_psurge_give_timebase,
 	.take_timebase	= smp_psurge_take_timebase,
 };
-#endif /* CONFIG_PPC32 - actually powersurge support */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 
 /*
  * Core 99 and later support
@@ -791,14 +802,14 @@ static int __init smp_core99_probe(void)
 	return ncpus;
 }
 
-static void __devinit smp_core99_kick_cpu(int nr)
+static int __devinit smp_core99_kick_cpu(int nr)
 {
 	unsigned int save_vector;
 	unsigned long target, flags;
 	unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
 
 	if (nr < 0 || nr > 3)
-		return;
+		return -ENOENT;
 
 	if (ppc_md.progress)
 		ppc_md.progress("smp_core99_kick_cpu", 0x346);
@@ -830,6 +841,8 @@ static void __devinit smp_core99_kick_cpu(int nr)
 
 	local_irq_restore(flags);
 	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+
+	return 0;
 }
 
 static void __devinit smp_core99_setup_cpu(int cpu_nr)
@@ -842,6 +855,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
 	mpic_setup_this_cpu();
 }
 
+#ifdef CONFIG_PPC64
 #ifdef CONFIG_HOTPLUG_CPU
 static int smp_core99_cpu_notify(struct notifier_block *self,
 				 unsigned long action, void *hcpu)
@@ -879,7 +893,6 @@ static struct notifier_block __cpuinitdata smp_core99_cpu_nb = {
 
 static void __init smp_core99_bringup_done(void)
 {
-#ifdef CONFIG_PPC64
 	extern void g5_phy_disable_cpu1(void);
 
 	/* Close i2c bus if it was used for tb sync */
@@ -894,14 +907,14 @@ static void __init smp_core99_bringup_done(void)
 		set_cpu_present(1, false);
 		g5_phy_disable_cpu1();
 	}
-#endif /* CONFIG_PPC64 */
-
 #ifdef CONFIG_HOTPLUG_CPU
 	register_cpu_notifier(&smp_core99_cpu_nb);
 #endif
+
 	if (ppc_md.progress)
 		ppc_md.progress("smp_core99_bringup_done", 0x349);
 }
+#endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -975,7 +988,9 @@ static void pmac_cpu_die(void)
 struct smp_ops_t core99_smp_ops = {
 	.message_pass	= smp_mpic_message_pass,
 	.probe		= smp_core99_probe,
+#ifdef CONFIG_PPC64
 	.bringup_done	= smp_core99_bringup_done,
+#endif
 	.kick_cpu	= smp_core99_kick_cpu,
 	.setup_cpu	= smp_core99_setup_cpu,
 	.give_timebase	= smp_core99_give_timebase,
@@ -1000,7 +1015,7 @@ void __init pmac_setup_smp(void)
 		of_node_put(np);
 		smp_ops = &core99_smp_ops;
 	}
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
 	else {
 		/* We have to set bits in cpu_possible_mask here since the
 		 * secondary CPU(s) aren't in the device tree. Various
@@ -1013,7 +1028,7 @@ void __init pmac_setup_smp(void)
 			set_cpu_possible(cpu, true);
 		smp_ops = &psurge_smp_ops;
 	}
-#endif /* CONFIG_PPC32 */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 
 #ifdef CONFIG_HOTPLUG_CPU
 	ppc_md.cpu_die = pmac_cpu_die;
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index f2f6413b81d3..600ed2c0ed59 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -197,7 +197,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
 	result = irq_set_chip_data(*virq, pd);
 
 	if (result) {
-		pr_debug("%s:%d: set_irq_chip_data failed\n",
+		pr_debug("%s:%d: irq_set_chip_data failed\n",
 			__func__, __LINE__);
 		goto fail_set;
 	}
@@ -659,11 +659,6 @@ static void __maybe_unused _dump_mask(struct ps3_private *pd,
 static void dump_bmp(struct ps3_private* pd) {};
 #endif /* defined(DEBUG) */
 
-static void ps3_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	irq_set_chip_data(virq, NULL);
-}
-
 static int ps3_host_map(struct irq_host *h, unsigned int virq,
 	irq_hw_number_t hwirq)
 {
@@ -683,7 +678,6 @@ static int ps3_host_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops ps3_host_ops = {
 	.map = ps3_host_map,
-	.unmap = ps3_host_unmap,
 	.match = ps3_host_match,
 };
 
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 51ffde40af2b..4c44794faac0 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -39,7 +39,7 @@
 #define MSG_COUNT 4
 static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
 
-static void do_message_pass(int target, int msg)
+static void ps3_smp_message_pass(int cpu, int msg)
 {
 	int result;
 	unsigned int virq;
@@ -49,28 +49,12 @@ static void do_message_pass(int target, int msg)
 		return;
 	}
 
-	virq = per_cpu(ps3_ipi_virqs, target)[msg];
+	virq = per_cpu(ps3_ipi_virqs, cpu)[msg];
 	result = ps3_send_event_locally(virq);
 
 	if (result)
 		DBG("%s:%d: ps3_send_event_locally(%d, %d) failed"
-			" (%d)\n", __func__, __LINE__, target, msg, result);
-}
-
-static void ps3_smp_message_pass(int target, int msg)
-{
-	int cpu;
-
-	if (target < NR_CPUS)
-		do_message_pass(target, msg);
-	else if (target == MSG_ALL_BUT_SELF) {
-		for_each_online_cpu(cpu)
-			if (cpu != smp_processor_id())
-				do_message_pass(cpu, msg);
-	} else {
-		for_each_online_cpu(cpu)
-			do_message_pass(cpu, msg);
-	}
+			" (%d)\n", __func__, __LINE__, cpu, msg, result);
 }
 
 static int ps3_smp_probe(void)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 39a472e9e80f..375a9f92158d 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -197,7 +197,7 @@ static void spu_unmap(struct spu *spu)
  * The current HV requires the spu shadow regs to be mapped with the
  * PTE page protection bits set as read-only (PP=3).  This implementation
  * uses the low level __ioremap() to bypass the page protection settings
- * inforced by ioremap_flags() to get the needed PTE bits set for the
+ * inforced by ioremap_prot() to get the needed PTE bits set for the
  * shadow regs.
  */
 
@@ -214,7 +214,7 @@ static int __init setup_areas(struct spu *spu)
 		goto fail_ioremap;
 	}
 
-	spu->local_store = (__force void *)ioremap_flags(spu->local_store_phys,
+	spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
 		LS_SIZE, _PAGE_NO_CACHE);
 
 	if (!spu->local_store) {
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5b3da4b4ea79..71af4c5d6c05 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -3,7 +3,10 @@ config PPC_PSERIES
 	bool "IBM pSeries & new (POWER5-based) iSeries"
 	select MPIC
 	select PCI_MSI
-	select XICS
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	select PPC_ICP_HV
+	select PPC_ICS_RTAS
 	select PPC_I8259
 	select PPC_RTAS
 	select PPC_RTAS_DAEMON
@@ -47,6 +50,24 @@ config SCANLOG
 	tristate "Scanlog dump interface"
 	depends on RTAS_PROC && PPC_PSERIES
 
+config IO_EVENT_IRQ
+	bool "IO Event Interrupt support"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Select this option, if you want to enable support for IO Event
+	  interrupts. IO event interrupt is a mechanism provided by RTAS
+	  to return information about hardware error and non-error events
+	  which may need OS attention. RTAS returns events for multiple
+	  event types and scopes. Device drivers can register their handlers
+	  to receive events.
+
+	  This option will only enable the IO event platform code. You
+	  will still need to enable or compile the actual drivers
+	  that use this infrastruture to handle IO event interrupts.
+
+	  Say Y if you are unsure.
+
 config LPARCFG
 	bool "LPAR Configuration Data"
 	depends on PPC_PSERIES || PPC_ISERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fc5237810ece..3556e402cbf5 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -5,7 +5,6 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
 			   firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)	+= smp.o
-obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
 obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_KEXEC)	+= kexec.o
@@ -22,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
 obj-$(CONFIG_PHYP_DUMP)		+= phyp_dump.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
+obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
 
 ifeq ($(CONFIG_PPC_PSERIES),y)
 obj-$(CONFIG_SUSPEND)		+= suspend.o
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index c371bc06434b..e9190073bb97 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7;
 
 
 /*
- * Size of per-cpu log buffers. Default is just under 16 pages worth.
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
  */
-static int dtl_buf_entries = (16 * 85);
-
+static int dtl_buf_entries = N_DISPATCH_LOG;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 struct dtl_ring {
@@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl)
 
 	/* Register our dtl buffer with the hypervisor. The HV expects the
 	 * buffer size to be passed in the second word of the buffer */
-	((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry);
+	((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
 
 	hwcpu = get_hard_smp_processor_id(dtl->cpu);
 	addr = __pa(dtl->buf);
@@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl)
 	long int rc;
 	struct dtl_entry *buf = NULL;
 
+	if (!dtl_cache)
+		return -ENOMEM;
+
 	/* only allow one reader */
 	if (dtl->buf)
 		return -EBUSY;
 
 	n_entries = dtl_buf_entries;
-	buf = kmalloc_node(n_entries * sizeof(struct dtl_entry),
-			GFP_KERNEL, cpu_to_node(dtl->cpu));
+	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
 	if (!buf) {
 		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
 				__func__, dtl->cpu);
@@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl)
 	spin_unlock(&dtl->lock);
 
 	if (rc)
-		kfree(buf);
+		kmem_cache_free(dtl_cache, buf);
 	return rc;
 }
 
@@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl)
 {
 	spin_lock(&dtl->lock);
 	dtl_stop(dtl);
-	kfree(dtl->buf);
+	kmem_cache_free(dtl_cache, dtl->buf);
 	dtl->buf = NULL;
 	dtl->buf_entries = 0;
 	spin_unlock(&dtl->lock);
@@ -365,7 +367,7 @@ static int dtl_init(void)
 
 	event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
 				dtl_dir, &dtl_event_mask);
-	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600,
+	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
 				dtl_dir, &dtl_buf_entries);
 
 	if (!event_mask_file || !buf_entries_file) {
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 89649173d3a3..46b55cf563e3 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -93,6 +93,7 @@ static int ibm_slot_error_detail;
 static int ibm_get_config_addr_info;
 static int ibm_get_config_addr_info2;
 static int ibm_configure_bridge;
+static int ibm_configure_pe;
 
 int eeh_subsystem_enabled;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
 	pci_regs_buf[0] = 0;
 
 	rtas_pci_enable(pdn, EEH_THAW_MMIO);
+	rtas_configure_bridge(pdn);
+	eeh_restore_bars(pdn);
 	loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
 
 	rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
@@ -448,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 }
 
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+	struct device_node *dn;
+
+	for_each_child_of_node(parent, dn) {
+		if (PCI_DN(dn)) {
+
+			struct pci_dev *dev = PCI_DN(dn)->pcidev;
+
+			if (dev && dev->driver)
+				*freset |= dev->needs_freset;
+
+			__eeh_set_pe_freset(dn, freset);
+		}
+	}
+}
+
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+	struct pci_dev *dev;
+	dn = find_device_pe(dn);
+
+	/* Back up one, since config addrs might be shared */
+	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+		dn = dn->parent;
+
+	dev = PCI_DN(dn)->pcidev;
+	if (dev)
+		*freset |= dev->needs_freset;
+
+	__eeh_set_pe_freset(dn, freset);
+}
+
 /**
  * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
  * @dn device node
@@ -692,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 	if (pdn->eeh_pe_config_addr)
 		config_addr = pdn->eeh_pe_config_addr;
 
-	rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+	rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
 	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid),
 	               state);
-	if (rc)
-		printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
-		        " (%d) #RST=%d dn=%s\n",
-		        rc, state, pdn->node->full_name);
+
+	/* Fundamental-reset not supported on this PE, try hot-reset */
+	if (rc == -8 && state == 3) {
+		rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+			       config_addr,
+			       BUID_HI(pdn->phb->buid),
+			       BUID_LO(pdn->phb->buid), 1);
+		if (rc)
+			printk(KERN_WARNING
+				"EEH: Unable to reset the failed slot,"
+				" #RST=%d dn=%s\n",
+				rc, pdn->node->full_name);
+	}
 }
 
 /**
@@ -736,18 +781,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 /**
  * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
  * @pdn: pci device node to be reset.
- *
- *  Return 0 if success, else a non-zero value.
  */
 
 static void __rtas_set_slot_reset(struct pci_dn *pdn)
 {
-	struct pci_dev *dev = pdn->pcidev;
+	unsigned int freset = 0;
 
-	/* Determine type of EEH reset required by device,
-	 * default hot reset or fundamental reset
-	 */
-	if (dev && dev->needs_freset)
+	/* Determine type of EEH reset required for
+	 * Partitionable Endpoint, a hot-reset (1)
+	 * or a fundamental reset (3).
+	 * A fundamental reset required by any device under
+	 * Partitionable Endpoint trumps hot-reset.
+  	 */
+	eeh_set_pe_freset(pdn->node, &freset);
+
+	if (freset)
 		rtas_pci_slot_reset(pdn, 3);
 	else
 		rtas_pci_slot_reset(pdn, 1);
@@ -895,13 +943,20 @@ rtas_configure_bridge(struct pci_dn *pdn)
 {
 	int config_addr;
 	int rc;
+	int token;
 
 	/* Use PE configuration address, if present */
 	config_addr = pdn->eeh_config_addr;
 	if (pdn->eeh_pe_config_addr)
 		config_addr = pdn->eeh_pe_config_addr;
 
-	rc = rtas_call(ibm_configure_bridge,3,1, NULL,
+	/* Use new configure-pe function, if supported */
+	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
+		token = ibm_configure_pe;
+	else
+		token = ibm_configure_bridge;
+
+	rc = rtas_call(token, 3, 1, NULL,
 	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid));
@@ -1077,6 +1132,7 @@ void __init eeh_init(void)
 	ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
 	ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
 	ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+	ibm_configure_pe = rtas_token("ibm,configure-pe");
 
 	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
 		return;
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index b8d70f5d9aa9..1b6cb10589e0 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	struct pci_bus *frozen_bus;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-	const char *location, *pci_str, *drv_str;
+	const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
 
 	frozen_dn = find_device_pe(event->dn);
 	if (!frozen_dn) {
@@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	frozen_pdn = PCI_DN(frozen_dn);
 	frozen_pdn->eeh_freeze_count++;
 
-	if (frozen_pdn->pcidev) {
-		pci_str = pci_name (frozen_pdn->pcidev);
-		drv_str = pcid_name (frozen_pdn->pcidev);
-	} else {
-		pci_str = eeh_pci_name(event->dev);
-		drv_str = pcid_name (event->dev);
-	}
+	pci_str = eeh_pci_name(event->dev);
+	drv_str = pcid_name(event->dev);
 	
 	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
 		goto excess_failures;
@@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	printk(KERN_WARNING
 	   "EEH: This PCI device has failed %d times in the last hour:\n",
 		frozen_pdn->eeh_freeze_count);
+
+	if (frozen_pdn->pcidev) {
+		bus_pci_str = pci_name(frozen_pdn->pcidev);
+		bus_drv_str = pcid_name(frozen_pdn->pcidev);
+		printk(KERN_WARNING
+			"EEH: Bus location=%s driver=%s pci addr=%s\n",
+			location, bus_drv_str, bus_pci_str);
+	}
+
 	printk(KERN_WARNING
-		"EEH: location=%s driver=%s pci addr=%s\n",
+		"EEH: Device location=%s driver=%s pci addr=%s\n",
 		location, drv_str, pci_str);
 
 	/* Walk the various device drivers attached to this slot through
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ef8c45489e20..46f13a3c5d09 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/system.h>
@@ -28,7 +29,7 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/pSeries_reconfig.h>
-#include "xics.h"
+#include <asm/xics.h>
 #include "plpar_wrappers.h"
 #include "offline_states.h"
 
@@ -280,7 +281,7 @@ static int pseries_add_processor(struct device_node *np)
 	}
 
 	for_each_cpu(cpu, tmp) {
-		BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask));
+		BUG_ON(cpu_present(cpu));
 		set_cpu_present(cpu, true);
 		set_hard_smp_processor_id(cpu, *intserv++);
 	}
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 000000000000..c829e6067d54
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * 	p = (struct pseries_io_event_sect_data *) data;
+ * 	if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * 		:
+ * 		:
+ * 	return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * 	.notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+/* pSeries event log format */
+
+/* Two bytes ASCII section IDs */
+#define PSERIES_ELOG_SECT_ID_PRIV_HDR		(('P' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_HDR		(('U' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC	(('P' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_EXTENDED_UH	(('E' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FAILING_MTMS	(('M' << 8) | 'T')
+#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC	(('S' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR	(('D' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FW_ERROR		(('S' << 8) | 'W')
+#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID	(('L' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID	(('L' << 8) | 'R')
+#define PSERIES_ELOG_SECT_ID_HMC_ID		(('H' << 8) | 'M')
+#define PSERIES_ELOG_SECT_ID_EPOW		(('E' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_IO_EVENT		(('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO	(('M' << 8) | 'I')
+#define PSERIES_ELOG_SECT_ID_CALL_HOME		(('C' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_DEF		(('U' << 8) | 'D')
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_elog_section {
+	uint16_t id;			/* 0x00 2-byte ASCII section ID	*/
+	uint16_t length;		/* 0x02 Section length in bytes	*/
+	uint8_t version;		/* 0x04 Section version		*/
+	uint8_t subtype;		/* 0x05 Section subtype		*/
+	uint16_t creator_component;	/* 0x06 Creator component ID	*/
+	uint8_t data[];			/* 0x08 Start of section data	*/
+};
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find data portion of a specific section in RTAS extended event log.
+ * @elog: RTAS error/event log.
+ * @sect_id: secsion ID.
+ *
+ * Return:
+ *	pointer to the section data of the specified section
+ *	NULL if not found
+ */
+static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog,
+						       uint16_t sect_id)
+{
+	struct rtas_ext_event_log_v6 *xelog =
+		(struct rtas_ext_event_log_v6 *) elog->buffer;
+	struct pseries_elog_section *sect;
+	unsigned char *p, *log_end;
+
+	/* Check that we understand the format */
+	if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+	    xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+	    xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+		return NULL;
+
+	log_end = elog->buffer + elog->extended_log_length;
+	p = xelog->vendor_log;
+	while (p < log_end) {
+		sect = (struct pseries_elog_section *)p;
+		if (sect->id == sect_id)
+			return sect;
+		p += sect->length;
+	}
+	return NULL;
+}
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * 	pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+	struct pseries_elog_section *sect;
+
+	/* We should only ever get called for io-event interrupts, but if
+	 * we do get called for another type then something went wrong so
+	 * make some noise about it.
+	 * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+	 * No need to check event log version.
+	 */
+	if (unlikely(elog->type != RTAS_TYPE_IO)) {
+		printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d",
+			    elog->type);
+		return NULL;
+	}
+
+	sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+	if (unlikely(!sect)) {
+		printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+			    "log does not contain an IO Event section. "
+			    "Could be a bug in system firmware!\n");
+		return NULL;
+	}
+	return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ *   error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ *   multiple events through a single interrupt, it must ensure that the
+ *   interrupt remains asserted until check-exception has been used to
+ *   process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ *   sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ *   event type, and sub-type. There is no easy way to pre-sort clients
+ *   by scope or event type alone. For example, Torrent ISR route change
+ *   event is reported with scope 0x00 (Not Applicatable) rather than
+ *   0x3B (Torrent-hub). It is better to let the clients to identify
+ *   who owns the the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+	struct pseries_io_event *event;
+	int rtas_rc;
+
+	for (;;) {
+		rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+				    RTAS_VECTOR_EXTERNAL_INTERRUPT,
+				    virq_to_hw(irq),
+				    RTAS_IO_EVENTS, 1 /* Time Critical */,
+				    __pa(ioei_rtas_buf),
+				    RTAS_DATA_BUF_SIZE);
+		if (rtas_rc != 0)
+			break;
+
+		event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+		if (!event)
+			continue;
+
+		atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+					   0, event);
+	}
+	return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+	struct device_node *np;
+
+	ioei_check_exception_token = rtas_token("check-exception");
+	if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("IO Event IRQ not supported on this system !\n");
+		return -ENODEV;
+	}
+	np = of_find_node_by_path("/event-sources/ibm,io-events");
+	if (np) {
+		request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+		of_node_put(np);
+	} else {
+		pr_err("io_event_irq: No ibm,io-events on system! "
+		       "IO Event interrupt disabled.\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d5412a18b26..01faab9456ca 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -659,15 +659,18 @@ static void remove_ddw(struct device_node *np)
 {
 	struct dynamic_dma_window_prop *dwp;
 	struct property *win64;
-	const u32 *ddr_avail;
+	const u32 *ddw_avail;
 	u64 liobn;
 	int len, ret;
 
-	ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
+	ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
 	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
-	if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
+	if (!win64)
 		return;
 
+	if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
+		goto delprop;
+
 	dwp = win64->value;
 	liobn = (u64)be32_to_cpu(dwp->liobn);
 
@@ -681,28 +684,29 @@ static void remove_ddw(struct device_node *np)
 		pr_debug("%s successfully cleared tces in window.\n",
 			 np->full_name);
 
-	ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
+	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
 	if (ret)
 		pr_warning("%s: failed to remove direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddr_avail[2], liobn);
+			np->full_name, ret, ddw_avail[2], liobn);
 	else
 		pr_debug("%s: successfully removed direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddr_avail[2], liobn);
-}
+			np->full_name, ret, ddw_avail[2], liobn);
 
+delprop:
+	ret = prom_remove_property(np, win64);
+	if (ret)
+		pr_warning("%s: failed to remove direct window property: %d\n",
+			np->full_name, ret);
+}
 
-static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn)
 {
-	struct device_node *dn;
-	struct pci_dn *pcidn;
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
 	u64 dma_addr = 0;
 
-	dn = pci_device_to_OF_node(dev);
-	pcidn = PCI_DN(dn);
 	spin_lock(&direct_window_list_lock);
 	/* check if we already created a window and dupe that config if so */
 	list_for_each_entry(window, &direct_window_list, list) {
@@ -717,36 +721,40 @@ static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *
 	return dma_addr;
 }
 
-static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
+static int find_existing_ddw_windows(void)
 {
-	struct device_node *dn;
-	struct pci_dn *pcidn;
 	int len;
+	struct device_node *pdn;
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
-	u64 dma_addr = 0;
 
-	dn = pci_device_to_OF_node(dev);
-	pcidn = PCI_DN(dn);
-	direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
-	if (direct64) {
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
+		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+		if (!direct64)
+			continue;
+
 		window = kzalloc(sizeof(*window), GFP_KERNEL);
-		if (!window) {
+		if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
+			kfree(window);
 			remove_ddw(pdn);
-		} else {
-			window->device = pdn;
-			window->prop = direct64;
-			spin_lock(&direct_window_list_lock);
-			list_add(&window->list, &direct_window_list);
-			spin_unlock(&direct_window_list_lock);
-			dma_addr = direct64->dma_base;
+			continue;
 		}
+
+		window->device = pdn;
+		window->prop = direct64;
+		spin_lock(&direct_window_list_lock);
+		list_add(&window->list, &direct_window_list);
+		spin_unlock(&direct_window_list_lock);
 	}
 
-	return dma_addr;
+	return 0;
 }
+machine_arch_initcall(pseries, find_existing_ddw_windows);
 
-static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 			struct ddw_query_response *query)
 {
 	struct device_node *dn;
@@ -767,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
 	if (pcidn->eeh_pe_config_addr)
 		cfg_addr = pcidn->eeh_pe_config_addr;
 	buid = pcidn->phb->buid;
-	ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
+	ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
 		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
 	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
-		" returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
+		" returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
 		BUID_LO(buid), ret);
 	return ret;
 }
 
-static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 			struct ddw_create_response *create, int page_shift,
 			int window_shift)
 {
@@ -800,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
 
 	do {
 		/* extra outputs are LIOBN and dma-addr (hi, lo) */
-		ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
+		ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr,
 				BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
 	} while (rtas_busy_delay(ret));
 	dev_info(&dev->dev,
 		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
-		"(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
+		"(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
 		 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
 		 window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
 
@@ -831,18 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	int page_shift;
 	u64 dma_addr, max_addr;
 	struct device_node *dn;
-	const u32 *uninitialized_var(ddr_avail);
+	const u32 *uninitialized_var(ddw_avail);
 	struct direct_window *window;
-	struct property *uninitialized_var(win64);
+	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
 
 	mutex_lock(&direct_window_init_mutex);
 
-	dma_addr = dupe_ddw_if_already_created(dev, pdn);
-	if (dma_addr != 0)
-		goto out_unlock;
-
-	dma_addr = dupe_ddw_if_kexec(dev, pdn);
+	dma_addr = find_existing_ddw(pdn);
 	if (dma_addr != 0)
 		goto out_unlock;
 
@@ -854,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * for the given node in that order.
 	 * the property is actually in the parent, not the PE
 	 */
-	ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
-	if (!ddr_avail || len < 3 * sizeof(u32))
+	ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
+	if (!ddw_avail || len < 3 * sizeof(u32))
 		goto out_unlock;
 
        /*
@@ -865,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * of page sizes: supported and supported for migrate-dma.
 	 */
 	dn = pci_device_to_OF_node(dev);
-	ret = query_ddw(dev, ddr_avail, &query);
+	ret = query_ddw(dev, ddw_avail, &query);
 	if (ret != 0)
 		goto out_unlock;
 
@@ -907,13 +911,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	}
 	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
 	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+	win64->length = sizeof(*ddwprop);
 	if (!win64->name || !win64->value) {
 		dev_info(&dev->dev,
 			"couldn't allocate property name and value\n");
 		goto out_free_prop;
 	}
 
-	ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
+	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
 	if (ret != 0)
 		goto out_free_prop;
 
@@ -1021,13 +1026,16 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 	const void *dma_window = NULL;
 	u64 dma_offset;
 
-	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+	if (!dev->dma_mask)
 		return -EIO;
 
+	if (!dev_is_pci(dev))
+		goto check_mask;
+
+	pdev = to_pci_dev(dev);
+
 	/* only attempt to use a new window if 64-bit DMA is requested */
 	if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
-		pdev = to_pci_dev(dev);
-
 		dn = pci_device_to_OF_node(pdev);
 		dev_dbg(dev, "node is %s\n", dn->full_name);
 
@@ -1054,12 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 		}
 	}
 
-	/* fall-through to iommu ops */
-	if (!ddw_enabled) {
-		dev_info(dev, "Using 32-bit DMA via iommu\n");
+	/* fall back on iommu ops, restore table pointer with ops */
+	if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
+		dev_info(dev, "Restoring 32-bit DMA via iommu\n");
 		set_dma_ops(dev, &dma_iommu_ops);
+		pci_dma_dev_setup_pSeriesLP(pdev);
 	}
 
+check_mask:
+	if (!dma_supported(dev, dma_mask))
+		return -EIO;
+
 	*dev->dma_mask = dma_mask;
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 77d38a5e2ff9..54cf3a4aa16b 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -7,15 +7,18 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
 #include <asm/machdep.h>
 #include <asm/page.h>
 #include <asm/firmware.h>
 #include <asm/kexec.h>
 #include <asm/mpic.h>
+#include <asm/xics.h>
 #include <asm/smp.h>
 
 #include "pseries.h"
-#include "xics.h"
 #include "plpar_wrappers.h"
 
 static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ca5d5898d320..39e6e0a7b2fa 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	/* Make pHyp happy */
 	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
 		hpte_r &= ~_PAGE_COHERENT;
+	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+		flags |= H_COALESCE_CAND;
 
 	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
 	if (unlikely(lpar_rc == H_PTEG_FULL)) {
@@ -573,7 +575,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 	unsigned long i, pix, rc;
 	unsigned long flags = 0;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 	unsigned long param[9];
 	unsigned long va;
 	unsigned long hash, index, shift, hidx, slot;
@@ -771,3 +773,47 @@ out:
 	local_irq_restore(flags);
 }
 #endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+	mpp_data->entitled_mem = retbuf[0];
+	mpp_data->mapped_mem = retbuf[1];
+
+	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	mpp_data->pool_num = retbuf[2] & 0xffff;
+
+	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+	mpp_data->pool_size = retbuf[4];
+	mpp_data->loan_request = retbuf[5];
+	mpp_data->backing_mem = retbuf[6];
+
+	return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+	rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+	mpp_x_data->coalesced_bytes = retbuf[0];
+	mpp_x_data->pool_coalesced_bytes = retbuf[1];
+	mpp_x_data->pool_purr_cycles = retbuf[2];
+	mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d9801117124b..4bf21207d7d3 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
 			lbuf[1]);
 }
 
-static inline long plpar_eoi(unsigned long xirr)
-{
-	return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
-	return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
-	return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall(H_XIRR, retbuf, cppr);
-
-	*xirr_ret = retbuf[0];
-
-	return rc;
-}
-
 #endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c55d7ad9c648..086d2ae4e06a 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
-			   irq_map[irq].hwirq,
+			   virq_to_hw(irq),
 			   RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
 			   critical, __pa(&ras_log_buf),
 				rtas_get_error_log_max());
@@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
-			   irq_map[irq].hwirq,
+			   virq_to_hw(irq),
 			   RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
 			   __pa(&ras_log_buf),
 				rtas_get_error_log_max());
@@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 	struct rtas_error_log *h, *errhdr = NULL;
 
 	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
-		printk(KERN_ERR "FWNMI: corrupt r3\n");
+		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 		return NULL;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 000724149089..593acceeff96 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -53,9 +53,9 @@
 #include <asm/irq.h>
 #include <asm/time.h>
 #include <asm/nvram.h>
-#include "xics.h"
 #include <asm/pmc.h>
 #include <asm/mpic.h>
+#include <asm/xics.h>
 #include <asm/ppc-pci.h>
 #include <asm/i8259.h>
 #include <asm/udbg.h>
@@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void)
 		mpic_assign_isu(mpic, n, isuaddr);
 	}
 
+	/* Setup top-level get_irq */
+	ppc_md.get_irq = mpic_get_irq;
+
 	/* All ISUs are setup, complete initialization */
 	mpic_init(mpic);
 
@@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void)
 
 static void __init pseries_xics_init_IRQ(void)
 {
-	xics_init_IRQ();
+	xics_init();
 	pseries_setup_i8259_cascade();
 }
 
@@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void)
 		if (strstr(typep, "open-pic")) {
 			pSeries_mpic_node = of_node_get(np);
 			ppc_md.init_IRQ       = pseries_mpic_init_IRQ;
-			ppc_md.get_irq        = mpic_get_irq;
 			setup_kexec_cpu_down_mpic();
 			smp_init_pseries_mpic();
 			return;
@@ -276,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = {
 	.notifier_call = pci_dn_reconfig_notifier,
 };
 
+struct kmem_cache *dtl_cache;
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
  * Allocate space for the dispatch trace log for all possible cpus
@@ -291,10 +295,12 @@ static int alloc_dispatch_logs(void)
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return 0;
 
+	if (!dtl_cache)
+		return 0;
+
 	for_each_possible_cpu(cpu) {
 		pp = &paca[cpu];
-		dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
-				   cpu_to_node(cpu));
+		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
 		if (!dtl) {
 			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
 				cpu);
@@ -324,10 +330,27 @@ static int alloc_dispatch_logs(void)
 
 	return 0;
 }
-
-early_initcall(alloc_dispatch_logs);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+static inline int alloc_dispatch_logs(void)
+{
+	return 0;
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
+static int alloc_dispatch_log_kmem_cache(void)
+{
+	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+						DISPATCH_LOG_BYTES, 0, NULL);
+	if (!dtl_cache) {
+		pr_warn("Failed to create dispatch trace log buffer cache\n");
+		pr_warn("Stolen time statistics will be unreliable\n");
+		return 0;
+	}
+
+	return alloc_dispatch_logs();
+}
+early_initcall(alloc_dispatch_log_kmem_cache);
+
 static void __init pSeries_setup_arch(void)
 {
 	/* Discover PIC type and setup ppc_md accordingly */
@@ -395,6 +418,16 @@ static int pseries_set_xdabr(unsigned long dabr)
 #define CMO_CHARACTERISTICS_TOKEN 44
 #define CMO_MAXLENGTH 1026
 
+void pSeries_coalesce_init(void)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+		powerpc_firmware_features |= FW_FEATURE_XCMO;
+	else
+		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
 /**
  * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
  * handle that here. (Stolen from parse_system_parameter_string)
@@ -464,6 +497,7 @@ void pSeries_cmo_feature_init(void)
 		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 		         CMO_SecPSP);
 		powerpc_firmware_features |= FW_FEATURE_CMO;
+		pSeries_coalesce_init();
 	} else
 		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 		         CMO_SecPSP);
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index a509c5292a67..fbffd7e47ab8 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,10 +44,11 @@
 #include <asm/mpic.h>
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>
+#include <asm/mpic.h>
+#include <asm/xics.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
-#include "xics.h"
 #include "offline_states.h"
 
 
@@ -136,7 +137,6 @@ out:
 	return 1;
 }
 
-#ifdef CONFIG_XICS
 static void __devinit smp_xics_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
@@ -151,14 +151,13 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 	set_default_offline_state(cpu);
 #endif
 }
-#endif /* CONFIG_XICS */
 
-static void __devinit smp_pSeries_kick_cpu(int nr)
+static int __devinit smp_pSeries_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
-		return;
+		return -ENOENT;
 
 	/*
 	 * The processor is currently spinning, waiting for the
@@ -180,6 +179,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
 						"Ret= %ld\n", nr, rc);
 	}
 #endif
+
+	return 0;
 }
 
 static int smp_pSeries_cpu_bootable(unsigned int nr)
@@ -197,23 +198,22 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
 
 	return 1;
 }
-#ifdef CONFIG_MPIC
+
 static struct smp_ops_t pSeries_mpic_smp_ops = {
 	.message_pass	= smp_mpic_message_pass,
 	.probe		= smp_mpic_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_mpic_setup_cpu,
 };
-#endif
-#ifdef CONFIG_XICS
+
 static struct smp_ops_t pSeries_xics_smp_ops = {
-	.message_pass	= smp_xics_message_pass,
-	.probe		= smp_xics_probe,
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
+	.probe		= xics_smp_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_xics_setup_cpu,
 	.cpu_bootable	= smp_pSeries_cpu_bootable,
 };
-#endif
 
 /* This is called very early */
 static void __init smp_init_pseries(void)
@@ -245,14 +245,12 @@ static void __init smp_init_pseries(void)
 	pr_debug(" <- smp_init_pSeries()\n");
 }
 
-#ifdef CONFIG_MPIC
 void __init smp_init_pseries_mpic(void)
 {
 	smp_ops = &pSeries_mpic_smp_ops;
 
 	smp_init_pseries();
 }
-#endif
 
 void __init smp_init_pseries_xics(void)
 {
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
deleted file mode 100644
index d6901334d66e..000000000000
--- a/arch/powerpc/platforms/pseries/xics.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.c
- *
- * Copyright 2000 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/radix-tree.h>
-#include <linux/cpu.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/percpu.h>
-
-#include <asm/firmware.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/smp.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
-#include <asm/machdep.h>
-
-#include "xics.h"
-#include "plpar_wrappers.h"
-
-static struct irq_host *xics_host;
-
-#define XICS_IPI		2
-#define XICS_IRQ_SPURIOUS	0
-
-/* Want a priority other than 0.  Various HW issues require this. */
-#define	DEFAULT_PRIORITY	5
-
-/*
- * Mark IPIs as higher priority so we can take them inside interrupts that
- * arent marked IRQF_DISABLED
- */
-#define IPI_PRIORITY		4
-
-/* The least favored priority */
-#define LOWEST_PRIORITY		0xFF
-
-/* The number of priorities defined above */
-#define MAX_NUM_PRIORITIES	3
-
-static unsigned int default_server = 0xFF;
-static unsigned int default_distrib_server = 0;
-static unsigned int interrupt_server_size = 8;
-
-/* RTAS service tokens */
-static int ibm_get_xive;
-static int ibm_set_xive;
-static int ibm_int_on;
-static int ibm_int_off;
-
-struct xics_cppr {
-	unsigned char stack[MAX_NUM_PRIORITIES];
-	int index;
-};
-
-static DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
-
-/* Direct hardware low level accessors */
-
-/* The part of the interrupt presentation layer that we care about */
-struct xics_ipl {
-	union {
-		u32 word;
-		u8 bytes[4];
-	} xirr_poll;
-	union {
-		u32 word;
-		u8 bytes[4];
-	} xirr;
-	u32 dummy;
-	union {
-		u32 word;
-		u8 bytes[4];
-	} qirr;
-};
-
-static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
-
-static inline unsigned int direct_xirr_info_get(void)
-{
-	int cpu = smp_processor_id();
-
-	return in_be32(&xics_per_cpu[cpu]->xirr.word);
-}
-
-static inline void direct_xirr_info_set(unsigned int value)
-{
-	int cpu = smp_processor_id();
-
-	out_be32(&xics_per_cpu[cpu]->xirr.word, value);
-}
-
-static inline void direct_cppr_info(u8 value)
-{
-	int cpu = smp_processor_id();
-
-	out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value);
-}
-
-static inline void direct_qirr_info(int n_cpu, u8 value)
-{
-	out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
-}
-
-
-/* LPAR low level accessors */
-
-static inline unsigned int lpar_xirr_info_get(unsigned char cppr)
-{
-	unsigned long lpar_rc;
-	unsigned long return_value;
-
-	lpar_rc = plpar_xirr(&return_value, cppr);
-	if (lpar_rc != H_SUCCESS)
-		panic(" bad return code xirr - rc = %lx\n", lpar_rc);
-	return (unsigned int)return_value;
-}
-
-static inline void lpar_xirr_info_set(unsigned int value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_eoi(value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc,
-		      value);
-}
-
-static inline void lpar_cppr_info(u8 value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_cppr(value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code cppr - rc = %lx\n", lpar_rc);
-}
-
-static inline void lpar_qirr_info(int n_cpu , u8 value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code qirr - rc = %lx\n", lpar_rc);
-}
-
-
-/* Interface to generic irq subsystem */
-
-#ifdef CONFIG_SMP
-/*
- * For the moment we only implement delivery to all cpus or one cpu.
- *
- * If the requested affinity is cpu_all_mask, we set global affinity.
- * If not we set it to the first cpu in the mask, even if multiple cpus
- * are set. This is so things like irqbalance (which set core and package
- * wide affinities) do the right thing.
- */
-static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
-			  unsigned int strict_check)
-{
-
-	if (!distribute_irqs)
-		return default_server;
-
-	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
-		int server = cpumask_first_and(cpu_online_mask, cpumask);
-
-		if (server < nr_cpu_ids)
-			return get_hard_smp_processor_id(server);
-
-		if (strict_check)
-			return -1;
-	}
-
-	/*
-	 * Workaround issue with some versions of JS20 firmware that
-	 * deliver interrupts to cpus which haven't been started. This
-	 * happens when using the maxcpus= boot option.
-	 */
-	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
-		return default_distrib_server;
-
-	return default_server;
-}
-#else
-#define get_irq_server(virq, cpumask, strict_check) (default_server)
-#endif
-
-static void xics_unmask_irq(struct irq_data *d)
-{
-	unsigned int hwirq;
-	int call_status;
-	int server;
-
-	pr_devel("xics: unmask virq %d\n", d->irq);
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	pr_devel(" -> map to hwirq 0x%x\n", hwirq);
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return;
-
-	server = get_irq_server(d->irq, d->affinity, 0);
-
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server,
-				DEFAULT_PRIORITY);
-	if (call_status != 0) {
-		printk(KERN_ERR
-			"%s: ibm_set_xive irq %u server %x returned %d\n",
-			__func__, hwirq, server, call_status);
-		return;
-	}
-
-	/* Now unmask the interrupt (often a no-op) */
-	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-}
-
-static unsigned int xics_startup(struct irq_data *d)
-{
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (d->msi_desc)
-		unmask_msi_irq(d);
-
-	/* unmask it */
-	xics_unmask_irq(d);
-	return 0;
-}
-
-static void xics_mask_real_irq(unsigned int hwirq)
-{
-	int call_status;
-
-	if (hwirq == XICS_IPI)
-		return;
-
-	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-
-	/* Have to set XIVE to 0xff to be able to remove a slot */
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq,
-				default_server, 0xff);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-}
-
-static void xics_mask_irq(struct irq_data *d)
-{
-	unsigned int hwirq;
-
-	pr_devel("xics: mask virq %d\n", d->irq);
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return;
-	xics_mask_real_irq(hwirq);
-}
-
-static void xics_mask_unknown_vec(unsigned int vec)
-{
-	printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec);
-	xics_mask_real_irq(vec);
-}
-
-static inline unsigned int xics_xirr_vector(unsigned int xirr)
-{
-	/*
-	 * The top byte is the old cppr, to be restored on EOI.
-	 * The remaining 24 bits are the vector.
-	 */
-	return xirr & 0x00ffffff;
-}
-
-static void push_cppr(unsigned int vec)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
-		return;
-
-	if (vec == XICS_IPI)
-		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
-	else
-		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
-}
-
-static unsigned int xics_get_irq_direct(void)
-{
-	unsigned int xirr = direct_xirr_info_get();
-	unsigned int vec = xics_xirr_vector(xirr);
-	unsigned int irq;
-
-	if (vec == XICS_IRQ_SPURIOUS)
-		return NO_IRQ;
-
-	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ)) {
-		push_cppr(vec);
-		return irq;
-	}
-
-	/* We don't have a linux mapping, so have rtas mask it. */
-	xics_mask_unknown_vec(vec);
-
-	/* We might learn about it later, so EOI it */
-	direct_xirr_info_set(xirr);
-	return NO_IRQ;
-}
-
-static unsigned int xics_get_irq_lpar(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-	unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]);
-	unsigned int vec = xics_xirr_vector(xirr);
-	unsigned int irq;
-
-	if (vec == XICS_IRQ_SPURIOUS)
-		return NO_IRQ;
-
-	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ)) {
-		push_cppr(vec);
-		return irq;
-	}
-
-	/* We don't have a linux mapping, so have RTAS mask it. */
-	xics_mask_unknown_vec(vec);
-
-	/* We might learn about it later, so EOI it */
-	lpar_xirr_info_set(xirr);
-	return NO_IRQ;
-}
-
-static unsigned char pop_cppr(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	if (WARN_ON(os_cppr->index < 1))
-		return LOWEST_PRIORITY;
-
-	return os_cppr->stack[--os_cppr->index];
-}
-
-static void xics_eoi_direct(struct irq_data *d)
-{
-	unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
-	iosync();
-	direct_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static void xics_eoi_lpar(struct irq_data *d)
-{
-	unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
-	iosync();
-	lpar_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static int
-xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
-{
-	unsigned int hwirq;
-	int status;
-	int xics_status[2];
-	int irq_server;
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return -1;
-
-	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-
-	if (status) {
-		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
-			__func__, hwirq, status);
-		return -1;
-	}
-
-	irq_server = get_irq_server(d->irq, cpumask, 1);
-	if (irq_server == -1) {
-		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
-		printk(KERN_WARNING
-			"%s: No online cpus in the mask %s for irq %d\n",
-			__func__, cpulist, d->irq);
-		return -1;
-	}
-
-	status = rtas_call(ibm_set_xive, 3, 1, NULL,
-				hwirq, irq_server, xics_status[1]);
-
-	if (status) {
-		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
-			__func__, hwirq, status);
-		return -1;
-	}
-
-	return 0;
-}
-
-static struct irq_chip xics_pic_direct = {
-	.name = "XICS",
-	.irq_startup = xics_startup,
-	.irq_mask = xics_mask_irq,
-	.irq_unmask = xics_unmask_irq,
-	.irq_eoi = xics_eoi_direct,
-	.irq_set_affinity = xics_set_affinity
-};
-
-static struct irq_chip xics_pic_lpar = {
-	.name = "XICS",
-	.irq_startup = xics_startup,
-	.irq_mask = xics_mask_irq,
-	.irq_unmask = xics_unmask_irq,
-	.irq_eoi = xics_eoi_lpar,
-	.irq_set_affinity = xics_set_affinity
-};
-
-
-/* Interface to arch irq controller subsystem layer */
-
-/* Points to the irq_chip we're actually using */
-static struct irq_chip *xics_irq_chip;
-
-static int xics_host_match(struct irq_host *h, struct device_node *node)
-{
-	/* IBM machines have interrupt parents of various funky types for things
-	 * like vdevices, events, etc... The trick we use here is to match
-	 * everything here except the legacy 8259 which is compatible "chrp,iic"
-	 */
-	return !of_device_is_compatible(node, "chrp,iic");
-}
-
-static int xics_host_map(struct irq_host *h, unsigned int virq,
-			 irq_hw_number_t hw)
-{
-	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
-
-	/* Insert the interrupt mapping into the radix tree for fast lookup */
-	irq_radix_revmap_insert(xics_host, virq, hw);
-
-	irq_set_status_flags(virq, IRQ_LEVEL);
-	irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
-	return 0;
-}
-
-static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
-			   const u32 *intspec, unsigned int intsize,
-			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
-
-{
-	/* Current xics implementation translates everything
-	 * to level. It is not technically right for MSIs but this
-	 * is irrelevant at this point. We might get smarter in the future
-	 */
-	*out_hwirq = intspec[0];
-	*out_flags = IRQ_TYPE_LEVEL_LOW;
-
-	return 0;
-}
-
-static struct irq_host_ops xics_host_ops = {
-	.match = xics_host_match,
-	.map = xics_host_map,
-	.xlate = xics_host_xlate,
-};
-
-static void __init xics_init_host(void)
-{
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		xics_irq_chip = &xics_pic_lpar;
-	else
-		xics_irq_chip = &xics_pic_direct;
-
-	xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
-				   XICS_IRQ_SPURIOUS);
-	BUG_ON(xics_host == NULL);
-	irq_set_default_host(xics_host);
-}
-
-
-/* Inter-processor interrupt support */
-
-#ifdef CONFIG_SMP
-/*
- * XICS only has a single IPI, so encode the messages per CPU
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
-
-static inline void smp_xics_do_message(int cpu, int msg)
-{
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	set_bit(msg, tgt);
-	mb();
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_qirr_info(cpu, IPI_PRIORITY);
-	else
-		direct_qirr_info(cpu, IPI_PRIORITY);
-}
-
-void smp_xics_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		smp_xics_do_message(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			smp_xics_do_message(i, msg);
-		}
-	}
-}
-
-static irqreturn_t xics_ipi_dispatch(int cpu)
-{
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	mb();	/* order mmio clearing qirr */
-	while (*tgt) {
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNCTION);
-		}
-		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
-			smp_message_recv(PPC_MSG_RESCHEDULE);
-		}
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
-		}
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
-			smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
-		}
-#endif
-	}
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id)
-{
-	int cpu = smp_processor_id();
-
-	direct_qirr_info(cpu, 0xff);
-
-	return xics_ipi_dispatch(cpu);
-}
-
-static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id)
-{
-	int cpu = smp_processor_id();
-
-	lpar_qirr_info(cpu, 0xff);
-
-	return xics_ipi_dispatch(cpu);
-}
-
-static void xics_request_ipi(void)
-{
-	unsigned int ipi;
-	int rc;
-
-	ipi = irq_create_mapping(xics_host, XICS_IPI);
-	BUG_ON(ipi == NO_IRQ);
-
-	/*
-	 * IPIs are marked IRQF_DISABLED as they must run with irqs
-	 * disabled
-	 */
-	irq_set_handler(ipi, handle_percpu_irq);
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		rc = request_irq(ipi, xics_ipi_action_lpar,
-				IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
-	else
-		rc = request_irq(ipi, xics_ipi_action_direct,
-				IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
-	BUG_ON(rc);
-}
-
-int __init smp_xics_probe(void)
-{
-	xics_request_ipi();
-
-	return cpumask_weight(cpu_possible_mask);
-}
-
-#endif /* CONFIG_SMP */
-
-
-/* Initialization */
-
-static void xics_update_irq_servers(void)
-{
-	int i, j;
-	struct device_node *np;
-	u32 ilen;
-	const u32 *ireg;
-	u32 hcpuid;
-
-	/* Find the server numbers for the boot cpu. */
-	np = of_get_cpu_node(boot_cpuid, NULL);
-	BUG_ON(!np);
-
-	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
-	if (!ireg) {
-		of_node_put(np);
-		return;
-	}
-
-	i = ilen / sizeof(int);
-	hcpuid = get_hard_smp_processor_id(boot_cpuid);
-
-	/* Global interrupt distribution server is specified in the last
-	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
-	 * entry fom this property for current boot cpu id and use it as
-	 * default distribution server
-	 */
-	for (j = 0; j < i; j += 2) {
-		if (ireg[j] == hcpuid) {
-			default_server = hcpuid;
-			default_distrib_server = ireg[j+1];
-		}
-	}
-
-	of_node_put(np);
-}
-
-static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
-				     unsigned long size)
-{
-	int i;
-
-	/* This may look gross but it's good enough for now, we don't quite
-	 * have a hard -> linux processor id matching.
-	 */
-	for_each_possible_cpu(i) {
-		if (!cpu_present(i))
-			continue;
-		if (hw_id == get_hard_smp_processor_id(i)) {
-			xics_per_cpu[i] = ioremap(addr, size);
-			return;
-		}
-	}
-}
-
-static void __init xics_init_one_node(struct device_node *np,
-				      unsigned int *indx)
-{
-	unsigned int ilen;
-	const u32 *ireg;
-
-	/* This code does the theorically broken assumption that the interrupt
-	 * server numbers are the same as the hard CPU numbers.
-	 * This happens to be the case so far but we are playing with fire...
-	 * should be fixed one of these days. -BenH.
-	 */
-	ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL);
-
-	/* Do that ever happen ? we'll know soon enough... but even good'old
-	 * f80 does have that property ..
-	 */
-	WARN_ON(ireg == NULL);
-	if (ireg) {
-		/*
-		 * set node starting index for this node
-		 */
-		*indx = *ireg;
-	}
-	ireg = of_get_property(np, "reg", &ilen);
-	if (!ireg)
-		panic("xics_init_IRQ: can't find interrupt reg property");
-
-	while (ilen >= (4 * sizeof(u32))) {
-		unsigned long addr, size;
-
-		/* XXX Use proper OF parsing code here !!! */
-		addr = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(u32);
-		addr |= *ireg++;
-		ilen -= sizeof(u32);
-		size = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(u32);
-		size |= *ireg++;
-		ilen -= sizeof(u32);
-		xics_map_one_cpu(*indx, addr, size);
-		(*indx)++;
-	}
-}
-
-void __init xics_init_IRQ(void)
-{
-	struct device_node *np;
-	u32 indx = 0;
-	int found = 0;
-	const u32 *isize;
-
-	ppc64_boot_msg(0x20, "XICS Init");
-
-	ibm_get_xive = rtas_token("ibm,get-xive");
-	ibm_set_xive = rtas_token("ibm,set-xive");
-	ibm_int_on  = rtas_token("ibm,int-on");
-	ibm_int_off = rtas_token("ibm,int-off");
-
-	for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
-		found = 1;
-		if (firmware_has_feature(FW_FEATURE_LPAR)) {
-			of_node_put(np);
-			break;
-			}
-		xics_init_one_node(np, &indx);
-	}
-	if (found == 0)
-		return;
-
-	/* get the bit size of server numbers */
-	found = 0;
-
-	for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
-		isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
-
-		if (!isize)
-			continue;
-
-		if (!found) {
-			interrupt_server_size = *isize;
-			found = 1;
-		} else if (*isize != interrupt_server_size) {
-			printk(KERN_WARNING "XICS: "
-			       "mismatched ibm,interrupt-server#-size\n");
-			interrupt_server_size = max(*isize,
-						    interrupt_server_size);
-		}
-	}
-
-	xics_update_irq_servers();
-	xics_init_host();
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		ppc_md.get_irq = xics_get_irq_lpar;
-	else
-		ppc_md.get_irq = xics_get_irq_direct;
-
-	xics_setup_cpu();
-
-	ppc64_boot_msg(0x21, "XICS Done");
-}
-
-/* Cpu startup, shutdown, and hotplug */
-
-static void xics_set_cpu_priority(unsigned char cppr)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	/*
-	 * we only really want to set the priority when there's
-	 * just one cppr value on the stack
-	 */
-	WARN_ON(os_cppr->index != 0);
-
-	os_cppr->stack[0] = cppr;
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_cppr_info(cppr);
-	else
-		direct_cppr_info(cppr);
-	iosync();
-}
-
-/* Have the calling processor join or leave the specified global queue */
-static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
-{
-	int index;
-	int status;
-
-	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
-		return;
-
-	index = (1UL << interrupt_server_size) - 1 - gserver;
-
-	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
-
-	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
-	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
-}
-
-void xics_setup_cpu(void)
-{
-	xics_set_cpu_priority(LOWEST_PRIORITY);
-
-	xics_set_cpu_giq(default_distrib_server, 1);
-}
-
-void xics_teardown_cpu(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-	int cpu = smp_processor_id();
-
-	/*
-	 * we have to reset the cppr index to 0 because we're
-	 * not going to return from the IPI
-	 */
-	os_cppr->index = 0;
-	xics_set_cpu_priority(0);
-
-	/* Clear any pending IPI request */
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_qirr_info(cpu, 0xff);
-	else
-		direct_qirr_info(cpu, 0xff);
-}
-
-void xics_kexec_teardown_cpu(int secondary)
-{
-	xics_teardown_cpu();
-
-	/*
-	 * we take the ipi irq but and never return so we
-	 * need to EOI the IPI, but want to leave our priority 0
-	 *
-	 * should we check all the other interrupts too?
-	 * should we be flagging idle loop instead?
-	 * or creating some task to be scheduled?
-	 */
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_xirr_info_set((0x00 << 24) | XICS_IPI);
-	else
-		direct_xirr_info_set((0x00 << 24) | XICS_IPI);
-
-	/*
-	 * Some machines need to have at least one cpu in the GIQ,
-	 * so leave the master cpu in the group.
-	 */
-	if (secondary)
-		xics_set_cpu_giq(default_distrib_server, 0);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* Interrupts are disabled. */
-void xics_migrate_irqs_away(void)
-{
-	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
-	int virq;
-
-	/* If we used to be the default server, move to the new "boot_cpuid" */
-	if (hw_cpu == default_server)
-		xics_update_irq_servers();
-
-	/* Reject any interrupt that was queued to us... */
-	xics_set_cpu_priority(0);
-
-	/* Remove ourselves from the global interrupt queue */
-	xics_set_cpu_giq(default_distrib_server, 0);
-
-	/* Allow IPIs again... */
-	xics_set_cpu_priority(DEFAULT_PRIORITY);
-
-	for_each_irq(virq) {
-		struct irq_desc *desc;
-		struct irq_chip *chip;
-		unsigned int hwirq;
-		int xics_status[2];
-		int status;
-		unsigned long flags;
-
-		/* We can't set affinity on ISA interrupts */
-		if (virq < NUM_ISA_INTERRUPTS)
-			continue;
-		if (irq_map[virq].host != xics_host)
-			continue;
-		hwirq = (unsigned int)irq_map[virq].hwirq;
-		/* We need to get IPIs still. */
-		if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-			continue;
-
-		desc = irq_to_desc(virq);
-
-		/* We only need to migrate enabled IRQS */
-		if (desc == NULL || desc->action == NULL)
-			continue;
-
-		chip = irq_desc_get_chip(desc);
-		if (chip == NULL || chip->irq_set_affinity == NULL)
-			continue;
-
-		raw_spin_lock_irqsave(&desc->lock, flags);
-
-		status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-		if (status) {
-			printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
-					__func__, hwirq, status);
-			goto unlock;
-		}
-
-		/*
-		 * We only support delivery to all cpus or to one cpu.
-		 * The irq has to be migrated only in the single cpu
-		 * case.
-		 */
-		if (xics_status[0] != hw_cpu)
-			goto unlock;
-
-		/* This is expected during cpu offline. */
-		if (cpu_online(cpu))
-			printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n",
-			       virq, cpu);
-
-		/* Reset affinity to all cpus */
-		cpumask_setall(desc->irq_data.affinity);
-		chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true);
-unlock:
-		raw_spin_unlock_irqrestore(&desc->lock, flags);
-	}
-}
-#endif
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
deleted file mode 100644
index d1d5a83039ae..000000000000
--- a/arch/powerpc/platforms/pseries/xics.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.h
- *
- * Copyright 2000 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef _POWERPC_KERNEL_XICS_H
-#define _POWERPC_KERNEL_XICS_H
-
-extern void xics_init_IRQ(void);
-extern void xics_setup_cpu(void);
-extern void xics_teardown_cpu(void);
-extern void xics_kexec_teardown_cpu(int secondary);
-extern void xics_migrate_irqs_away(void);
-extern int smp_xics_probe(void);
-extern void smp_xics_message_pass(int target, int msg);
-
-#endif /* _POWERPC_KERNEL_XICS_H */
diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig
new file mode 100644
index 000000000000..c3c48eb62cc1
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Kconfig
@@ -0,0 +1,28 @@
+config PPC_WSP
+	bool
+	default n
+
+menu "WSP platform selection"
+	depends on PPC_BOOK3E_64
+
+config PPC_PSR2
+	bool "PSR-2 platform"
+	select PPC_A2
+	select GENERIC_TBSYNC
+	select PPC_SCOM
+	select EPAPR_BOOT
+	select PPC_WSP
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	default y
+
+endmenu
+
+config PPC_A2_DD2
+	bool "Support for DD2 based A2/WSP systems"
+	depends on PPC_A2
+
+config WORKAROUND_ERRATUM_463
+	depends on PPC_A2_DD2
+	bool "Workaround erratum 463"
+	default y
diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile
new file mode 100644
index 000000000000..095be73d6cd4
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Makefile
@@ -0,0 +1,6 @@
+ccflags-y			+= -mno-minimal-toc
+
+obj-y				+= setup.o ics.o
+obj-$(CONFIG_PPC_PSR2)		+= psr2.o opb_pic.o
+obj-$(CONFIG_PPC_WSP)		+= scom_wsp.o
+obj-$(CONFIG_SMP)		+= smp.o scom_smp.o
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
new file mode 100644
index 000000000000..e53bd9e7b125
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.c
@@ -0,0 +1,712 @@
+/*
+ * Copyright 2008-2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/xics.h>
+
+#include "wsp.h"
+#include "ics.h"
+
+
+/* WSP ICS */
+
+struct wsp_ics {
+	struct ics ics;
+	struct device_node *dn;
+	void __iomem *regs;
+	spinlock_t lock;
+	unsigned long *bitmap;
+	u32 chip_id;
+	u32 lsi_base;
+	u32 lsi_count;
+	u64 hwirq_start;
+	u64 count;
+#ifdef CONFIG_SMP
+	int *hwirq_cpu_map;
+#endif
+};
+
+#define to_wsp_ics(ics)	container_of(ics, struct wsp_ics, ics)
+
+#define INT_SRC_LAYER_BUID_REG(base)	((base) + 0x00)
+#define IODA_TBL_ADDR_REG(base)		((base) + 0x18)
+#define IODA_TBL_DATA_REG(base)		((base) + 0x20)
+#define XIVE_UPDATE_REG(base)		((base) + 0x28)
+#define ICS_INT_CAPS_REG(base)		((base) + 0x30)
+
+#define TBL_AUTO_INCREMENT	((1UL << 63) | (1UL << 15))
+#define TBL_SELECT_XIST		(1UL << 48)
+#define TBL_SELECT_XIVT		(1UL << 49)
+
+#define IODA_IRQ(irq)		((irq) & (0x7FFULL))	/* HRM 5.1.3.4 */
+
+#define XIST_REQUIRED		0x8
+#define XIST_REJECTED		0x4
+#define XIST_PRESENTED		0x2
+#define XIST_PENDING		0x1
+
+#define XIVE_SERVER_SHIFT	42
+#define XIVE_SERVER_MASK	0xFFFFULL
+#define XIVE_PRIORITY_MASK	0xFFULL
+#define XIVE_PRIORITY_SHIFT	32
+#define XIVE_WRITE_ENABLE	(1ULL << 63)
+
+/*
+ * The docs refer to a 6 bit field called ChipID, which consists of a
+ * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero
+ * so we ignore it, and every where we use "chip id" in this code we
+ * mean the NodeID.
+ */
+#define WSP_ICS_CHIP_SHIFT		17
+
+
+static struct wsp_ics *ics_list;
+static int num_ics;
+
+/* ICS Source controller accessors */
+
+static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq)
+{
+	unsigned long flags;
+	u64 xive;
+
+	spin_lock_irqsave(&ics->lock, flags);
+	out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq));
+	xive = in_be64(IODA_TBL_DATA_REG(ics->regs));
+	spin_unlock_irqrestore(&ics->lock, flags);
+
+	return xive;
+}
+
+static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive)
+{
+	xive &= ~XIVE_ADDR_MASK;
+	xive |= (irq & XIVE_ADDR_MASK);
+	xive |= XIVE_WRITE_ENABLE;
+
+	out_be64(XIVE_UPDATE_REG(ics->regs), xive);
+}
+
+static u64 xive_set_server(u64 xive, unsigned int server)
+{
+	u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT);
+
+	xive &= mask;
+	xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT;
+
+	return xive;
+}
+
+static u64 xive_set_priority(u64 xive, unsigned int priority)
+{
+	u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT);
+
+	xive &= mask;
+	xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT;
+
+	return xive;
+}
+
+
+#ifdef CONFIG_SMP
+/* Find logical CPUs within mask on a given chip and store result in ret */
+void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret)
+{
+	int cpu, chip;
+	struct device_node *cpu_dn, *dn;
+	const u32 *prop;
+
+	cpumask_clear(ret);
+	for_each_cpu(cpu, mask) {
+		cpu_dn = of_get_cpu_node(cpu, NULL);
+		if (!cpu_dn)
+			continue;
+
+		prop = of_get_property(cpu_dn, "at-node", NULL);
+		if (!prop) {
+			of_node_put(cpu_dn);
+			continue;
+		}
+
+		dn = of_find_node_by_phandle(*prop);
+		of_node_put(cpu_dn);
+
+		chip = wsp_get_chip_id(dn);
+		if (chip == chip_id)
+			cpumask_set_cpu(cpu, ret);
+
+		of_node_put(dn);
+	}
+}
+
+/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+			   const cpumask_t *affinity)
+{
+	cpumask_var_t avail, newmask;
+	int ret = -ENOMEM, cpu, cpu_rover = 0, target;
+	int index = hwirq - ics->hwirq_start;
+	unsigned int nodeid;
+
+	BUG_ON(index < 0 || index >= ics->count);
+
+	if (!ics->hwirq_cpu_map)
+		return -ENOMEM;
+
+	if (!distribute_irqs) {
+		ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server;
+		return 0;
+	}
+
+	/* Allocate needed CPU masks */
+	if (!alloc_cpumask_var(&avail, GFP_KERNEL))
+		goto ret;
+	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+		goto freeavail;
+
+	/* Find PBus attached to the source of this IRQ */
+	nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */
+
+	/* Find CPUs that could handle this IRQ */
+	if (affinity)
+		cpumask_and(avail, cpu_online_mask, affinity);
+	else
+		cpumask_copy(avail, cpu_online_mask);
+
+	/* Narrow selection down to logical CPUs on the same chip */
+	cpus_on_chip(nodeid, avail, newmask);
+
+	/* Ensure we haven't narrowed it down to 0 */
+	if (unlikely(cpumask_empty(newmask))) {
+		if (unlikely(cpumask_empty(avail))) {
+			ret = -1;
+			goto out;
+		}
+		cpumask_copy(newmask, avail);
+	}
+
+	/* Choose a CPU out of those we narrowed it down to in round robin */
+	target = hwirq % cpumask_weight(newmask);
+	for_each_cpu(cpu, newmask) {
+		if (cpu_rover++ >= target) {
+			ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu);
+			ret = 0;
+			goto out;
+		}
+	}
+
+	/* Shouldn't happen */
+	WARN_ON(1);
+
+out:
+	free_cpumask_var(newmask);
+freeavail:
+	free_cpumask_var(avail);
+ret:
+	if (ret < 0) {
+		ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask);
+		pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n",
+			   hwirq, ics->hwirq_cpu_map[index]);
+	}
+	return ret;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics)
+{
+	int i;
+
+	ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL);
+	if (!ics->hwirq_cpu_map) {
+		pr_warning("Allocate hwirq_cpu_map failed, "
+			   "IRQ balancing disabled\n");
+		return;
+	}
+
+	for (i=0; i < ics->count; i++)
+		ics->hwirq_cpu_map[i] = xics_default_server;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+	int index = hwirq - ics->hwirq_start;
+
+	BUG_ON(index < 0 || index >= ics->count);
+
+	if (!ics->hwirq_cpu_map)
+		return xics_default_server;
+
+	return ics->hwirq_cpu_map[index];
+}
+#else /* !CONFIG_SMP */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+			   const cpumask_t *affinity)
+{
+	return 0;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+	return xics_default_server;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics) { }
+#endif
+
+static void wsp_chip_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics;
+	int server;
+	u64 xive;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	ics = d->chip_data;
+	if (WARN_ON(!ics))
+		return;
+
+	server = get_irq_server(ics, hw_irq);
+
+	xive = wsp_ics_get_xive(ics, hw_irq);
+	xive = xive_set_server(xive, server);
+	xive = xive_set_priority(xive, DEFAULT_PRIORITY);
+	wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static unsigned int wsp_chip_startup(struct irq_data *d)
+{
+	/* unmask it */
+	wsp_chip_unmask_irq(d);
+	return 0;
+}
+
+static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics)
+{
+	u64 xive;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	if (WARN_ON(!ics))
+		return;
+	xive = wsp_ics_get_xive(ics, hw_irq);
+	xive = xive_set_server(xive, xics_default_server);
+	xive = xive_set_priority(xive, LOWEST_PRIORITY);
+	wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static void wsp_chip_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics = d->chip_data;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	wsp_mask_real_irq(hw_irq, ics);
+}
+
+static int wsp_chip_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask, bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics;
+	int ret;
+	u64 xive;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	ics = d->chip_data;
+	if (WARN_ON(!ics))
+		return -1;
+	xive = wsp_ics_get_xive(ics, hw_irq);
+
+	/*
+	 * For the moment only implement delivery to all cpus or one cpu.
+	 * Get current irq_server for the given irq
+	 */
+	ret = cache_hwirq_map(ics, d->irq, cpumask);
+	if (ret == -1) {
+		char cpulist[128];
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		pr_warning("%s: No online cpus in the mask %s for irq %d\n",
+			   __func__, cpulist, d->irq);
+		return -1;
+	} else if (ret == -ENOMEM) {
+		pr_warning("%s: Out of memory\n", __func__);
+		return -1;
+	}
+
+	xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
+	wsp_ics_set_xive(ics, hw_irq, xive);
+
+	return 0;
+}
+
+static struct irq_chip wsp_irq_chip = {
+	.name = "WSP ICS",
+	.irq_startup		= wsp_chip_startup,
+	.irq_mask		= wsp_chip_mask_irq,
+	.irq_unmask		= wsp_chip_unmask_irq,
+	.irq_set_affinity	= wsp_chip_set_affinity
+};
+
+static int wsp_ics_host_match(struct ics *ics, struct device_node *dn)
+{
+	/* All ICSs in the system implement a global irq number space,
+	 * so match against them all. */
+	return of_device_is_compatible(dn, "ibm,ppc-xics");
+}
+
+static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq)
+{
+	if (hwirq >= wsp_ics->hwirq_start &&
+	    hwirq <  wsp_ics->hwirq_start + wsp_ics->count)
+		return 1;
+
+	return 0;
+}
+
+static int wsp_ics_map(struct ics *ics, unsigned int virq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+	unsigned int hw_irq = virq_to_hw(virq);
+	unsigned long flags;
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return -ENOENT;
+
+	irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq);
+
+	irq_set_chip_data(virq, wsp_ics);
+
+	spin_lock_irqsave(&wsp_ics->lock, flags);
+	bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0);
+	spin_unlock_irqrestore(&wsp_ics->lock, flags);
+
+	return 0;
+}
+
+static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return;
+
+	pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq);
+	wsp_mask_real_irq(hw_irq, wsp_ics);
+}
+
+static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return -ENOENT;
+
+	return get_irq_server(wsp_ics, hw_irq);
+}
+
+/* HW Number allocation API */
+
+static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn)
+{
+	struct device_node *iparent;
+	int i;
+
+	iparent = of_irq_find_parent(dn);
+	if (!iparent) {
+		pr_err("wsp_ics: Failed to find interrupt parent!\n");
+		return NULL;
+	}
+
+	for(i = 0; i < num_ics; i++) {
+		if(ics_list[i].dn == iparent)
+			break;
+	}
+
+	if (i >= num_ics) {
+		pr_err("wsp_ics: Unable to find parent bitmap!\n");
+		return NULL;
+	}
+
+	return &ics_list[i];
+}
+
+int wsp_ics_alloc_irq(struct device_node *dn, int num)
+{
+	struct wsp_ics *ics;
+	int order, offset;
+
+	ics = wsp_ics_find_dn_ics(dn);
+	if (!ics)
+		return -ENODEV;
+
+	/* Fast, but overly strict if num isn't a power of two */
+	order = get_count_order(num);
+
+	spin_lock_irq(&ics->lock);
+	offset = bitmap_find_free_region(ics->bitmap, ics->count, order);
+	spin_unlock_irq(&ics->lock);
+
+	if (offset < 0)
+		return offset;
+
+	return offset + ics->hwirq_start;
+}
+
+void wsp_ics_free_irq(struct device_node *dn, unsigned int irq)
+{
+	struct wsp_ics *ics;
+
+	ics = wsp_ics_find_dn_ics(dn);
+	if (WARN_ON(!ics))
+		return;
+
+	spin_lock_irq(&ics->lock);
+	bitmap_release_region(ics->bitmap, irq, 0);
+	spin_unlock_irq(&ics->lock);
+}
+
+/* Initialisation */
+
+static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics,
+				      struct device_node *dn)
+{
+	int len, i, j, size;
+	u32 start, count;
+	const u32 *p;
+
+	size = BITS_TO_LONGS(ics->count) * sizeof(long);
+	ics->bitmap = kzalloc(size, GFP_KERNEL);
+	if (!ics->bitmap) {
+		pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&ics->lock);
+
+	p = of_get_property(dn, "available-ranges", &len);
+	if (!p || !len) {
+		/* FIXME this should be a WARN() once mambo is updated */
+		pr_err("wsp_ics: No available-ranges defined for %s\n",
+			dn->full_name);
+		return 0;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		/* FIXME this should be a WARN() once mambo is updated */
+		pr_err("wsp_ics: Invalid available-ranges for %s\n",
+			dn->full_name);
+		return 0;
+	}
+
+	bitmap_fill(ics->bitmap, ics->count);
+
+	for (i = 0; i < len / sizeof(u32); i += 2) {
+		start = of_read_number(p + i, 1);
+		count = of_read_number(p + i + 1, 1);
+
+		pr_devel("%s: start: %d count: %d\n", __func__, start, count);
+
+		if ((start + count) > (ics->hwirq_start + ics->count) ||
+		     start < ics->hwirq_start) {
+			pr_err("wsp_ics: Invalid range! -> %d to %d\n",
+					start, start + count);
+			break;
+		}
+
+		for (j = 0; j < count; j++)
+			bitmap_release_region(ics->bitmap,
+				(start + j) - ics->hwirq_start, 0);
+	}
+
+	/* Ensure LSIs are not available for allocation */
+	bitmap_allocate_region(ics->bitmap, ics->lsi_base,
+			       get_count_order(ics->lsi_count));
+
+	return 0;
+}
+
+static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn)
+{
+	u32 lsi_buid, msi_buid, msi_base, msi_count;
+	void __iomem *regs;
+	const u32 *p;
+	int rc, len, i;
+	u64 caps, buid;
+
+	p = of_get_property(dn, "interrupt-ranges", &len);
+	if (!p || len < (2 * sizeof(u32))) {
+		pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n",
+			dn->full_name);
+		return -ENOENT;
+	}
+
+	if (len > (2 * sizeof(u32))) {
+		pr_err("wsp_ics: Multiple ics ranges not supported.\n");
+		return -EINVAL;
+	}
+
+	regs = of_iomap(dn, 0);
+	if (!regs) {
+		pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name);
+		return -ENXIO;
+	}
+
+	ics->hwirq_start = of_read_number(p, 1);
+	ics->count = of_read_number(p + 1, 1);
+	ics->regs = regs;
+
+	ics->chip_id = wsp_get_chip_id(dn);
+	if (WARN_ON(ics->chip_id < 0))
+		ics->chip_id = 0;
+
+	/* Get some informations about the critter */
+	caps = in_be64(ICS_INT_CAPS_REG(ics->regs));
+	buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs));
+	ics->lsi_count = caps >> 56;
+	msi_count = (caps >> 44) & 0x7ff;
+
+	/* Note: LSI BUID is 9 bits, but really only 3 are BUID and the
+	 * rest is mixed in the interrupt number. We store the whole
+	 * thing though
+	 */
+	lsi_buid = (buid >> 48) & 0x1ff;
+	ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5;
+	msi_buid = (buid >> 37) & 0x7;
+	msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11;
+
+	pr_info("wsp_ics: Found %s\n", dn->full_name);
+	pr_info("wsp_ics:    irq range : 0x%06llx..0x%06llx\n",
+		ics->hwirq_start, ics->hwirq_start + ics->count - 1);
+	pr_info("wsp_ics:    %4d LSIs : 0x%06x..0x%06x\n",
+		ics->lsi_count, ics->lsi_base,
+		ics->lsi_base + ics->lsi_count - 1);
+	pr_info("wsp_ics:    %4d MSIs : 0x%06x..0x%06x\n",
+		msi_count, msi_base,
+		msi_base + msi_count - 1);
+
+	/* Let's check the HW config is sane */
+	if (ics->lsi_base < ics->hwirq_start ||
+	    (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count))
+		pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n");
+	if (msi_base < ics->hwirq_start ||
+	    (msi_base + msi_count) > (ics->hwirq_start + ics->count))
+		pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n");
+
+	/* We don't check for overlap between LSI and MSI, which will happen
+	 * if we use the same BUID, I'm not sure yet how legit that is.
+	 */
+
+	rc = wsp_ics_bitmap_setup(ics, dn);
+	if (rc) {
+		iounmap(regs);
+		return rc;
+	}
+
+	ics->dn = of_node_get(dn);
+	alloc_irq_map(ics);
+
+	for(i = 0; i < ics->count; i++)
+		wsp_mask_real_irq(ics->hwirq_start + i, ics);
+
+	ics->ics.map = wsp_ics_map;
+	ics->ics.mask_unknown = wsp_ics_mask_unknown;
+	ics->ics.get_server = wsp_ics_get_server;
+	ics->ics.host_match = wsp_ics_host_match;
+
+	xics_register_ics(&ics->ics);
+
+	return 0;
+}
+
+static void __init wsp_ics_set_default_server(void)
+{
+	struct device_node *np;
+	u32 hwid;
+
+	/* Find the server number for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hwid = get_hard_smp_processor_id(boot_cpuid);
+
+	pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name);
+	xics_default_server = hwid;
+
+	of_node_put(np);
+}
+
+static int __init wsp_ics_init(void)
+{
+	struct device_node *dn;
+	struct wsp_ics *ics;
+	int rc, found;
+
+	wsp_ics_set_default_server();
+
+	found = 0;
+	for_each_compatible_node(dn, NULL, "ibm,ppc-xics")
+		found++;
+
+	if (found == 0) {
+		pr_err("wsp_ics: No ICS's found!\n");
+		return -ENODEV;
+	}
+
+	ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL);
+	if (!ics_list) {
+		pr_err("wsp_ics: No memory for structs.\n");
+		return -ENOMEM;
+	}
+
+	num_ics = 0;
+	ics = ics_list;
+	for_each_compatible_node(dn, NULL, "ibm,wsp-xics") {
+		rc = wsp_ics_setup(ics, dn);
+		if (rc == 0) {
+			ics++;
+			num_ics++;
+		}
+	}
+
+	if (found != num_ics) {
+		pr_err("wsp_ics: Failed setting up %d ICS's\n",
+			found - num_ics);
+		return -1;
+	}
+
+	return 0;
+}
+
+void __init wsp_init_irq(void)
+{
+	wsp_ics_init();
+	xics_init();
+
+	/* We need to patch our irq chip's EOI to point to the right ICP */
+	wsp_irq_chip.irq_eoi = icp_ops->eoi;
+}
diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h
new file mode 100644
index 000000000000..e34d53102640
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2009 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ICS_H
+#define __ICS_H
+
+#define XIVE_ADDR_MASK		0x7FFULL
+
+extern void wsp_init_irq(void);
+
+extern int wsp_ics_alloc_irq(struct device_node *dn, int num);
+extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq);
+
+#endif /* __ICS_H */
diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
new file mode 100644
index 000000000000..be05631a3c1c
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/opb_pic.c
@@ -0,0 +1,332 @@
+/*
+ * IBM Onboard Peripheral Bus Interrupt Controller
+ *
+ * Copyright 2010 Jack Miller, IBM Corporation.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#include <asm/reg_a2.h>
+#include <asm/irq.h>
+
+#define OPB_NR_IRQS 32
+
+#define OPB_MLSASIER	0x04    /* MLS Accumulated Status IER */
+#define OPB_MLSIR	0x50	/* MLS Interrupt Register */
+#define OPB_MLSIER	0x54	/* MLS Interrupt Enable Register */
+#define OPB_MLSIPR	0x58	/* MLS Interrupt Polarity Register */
+#define OPB_MLSIIR	0x5c	/* MLS Interrupt Inputs Register */
+
+static int opb_index = 0;
+
+struct opb_pic {
+	struct irq_host *host;
+	void *regs;
+	int index;
+	spinlock_t lock;
+};
+
+static u32 opb_in(struct opb_pic *opb, int offset)
+{
+	return in_be32(opb->regs + offset);
+}
+
+static void opb_out(struct opb_pic *opb, int offset, u32 val)
+{
+	out_be32(opb->regs + offset, val);
+}
+
+static void opb_unmask_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 ier, bitset;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier | bitset);
+	ier = opb_in(opb, OPB_MLSIER);
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 ier, mask;
+
+	opb = d->chip_data;
+	mask = ~(1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier & mask);
+	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_ack_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 bitset;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	opb_out(opb, OPB_MLSIR, bitset);
+	opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_ack_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 bitset;
+	u32 ier, ir;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier & ~bitset);
+	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+	opb_out(opb, OPB_MLSIR, bitset);
+	ir = opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static int opb_set_irq_type(struct irq_data *d, unsigned int flow)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	int invert, ipr, mask, bit;
+
+	opb = d->chip_data;
+
+	/* The only information we're interested in in the type is whether it's
+	 * a high or low trigger. For high triggered interrupts, the polarity
+	 * set for it in the MLS Interrupt Polarity Register is 0, for low
+	 * interrupts it's 1 so that the proper input in the MLS Interrupt Input
+	 * Register is interrupted as asserting the interrupt. */
+
+	switch (flow) {
+		case IRQ_TYPE_NONE:
+			opb_mask_irq(d);
+			return 0;
+
+		case IRQ_TYPE_LEVEL_HIGH:
+			invert = 0;
+			break;
+
+		case IRQ_TYPE_LEVEL_LOW:
+			invert = 1;
+			break;
+
+		default:
+			return -EINVAL;
+	}
+
+	bit = (1 << (31 - irqd_to_hwirq(d)));
+	mask = ~bit;
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ipr = opb_in(opb, OPB_MLSIPR);
+	ipr = (ipr & mask) | (invert ? bit : 0);
+	opb_out(opb, OPB_MLSIPR, ipr);
+	ipr = opb_in(opb, OPB_MLSIPR);  // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+
+	/* Record the type in the interrupt descriptor */
+	irqd_set_trigger_type(d, flow);
+
+	return 0;
+}
+
+static struct irq_chip opb_irq_chip = {
+	.name		= "OPB",
+	.irq_mask	= opb_mask_irq,
+	.irq_unmask	= opb_unmask_irq,
+	.irq_mask_ack	= opb_mask_ack_irq,
+	.irq_ack	= opb_ack_irq,
+	.irq_set_type	= opb_set_irq_type
+};
+
+static int opb_host_map(struct irq_host *host, unsigned int virq,
+		irq_hw_number_t hwirq)
+{
+	struct opb_pic *opb;
+
+	opb = host->host_data;
+
+	/* Most of the important stuff is handled by the generic host code, like
+	 * the lookup, so just attach some info to the virtual irq */
+
+	irq_set_chip_data(virq, opb);
+	irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq);
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int opb_host_xlate(struct irq_host *host, struct device_node *dn,
+		const u32 *intspec, unsigned int intsize,
+		irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+	/* Interrupt size must == 2 */
+	BUG_ON(intsize != 2);
+	*out_hwirq = intspec[0];
+	*out_type = intspec[1];
+	return 0;
+}
+
+static struct irq_host_ops opb_host_ops = {
+	.map = opb_host_map,
+	.xlate = opb_host_xlate,
+};
+
+irqreturn_t opb_irq_handler(int irq, void *private)
+{
+	struct opb_pic *opb;
+	u32 ir, src, subvirq;
+
+	opb = (struct opb_pic *) private;
+
+	/* Read the OPB MLS Interrupt Register for
+	 * asserted interrupts */
+	ir = opb_in(opb, OPB_MLSIR);
+	if (!ir)
+		return IRQ_NONE;
+
+	do {
+		/* Get 1 - 32 source, *NOT* bit */
+		src = 32 - ffs(ir);
+
+		/* Translate from the OPB's conception of interrupt number to
+		 * Linux's virtual IRQ */
+
+		subvirq = irq_linear_revmap(opb->host, src);
+
+		generic_handle_irq(subvirq);
+	} while ((ir = opb_in(opb, OPB_MLSIR)));
+
+	return IRQ_HANDLED;
+}
+
+struct opb_pic *opb_pic_init_one(struct device_node *dn)
+{
+	struct opb_pic *opb;
+	struct resource res;
+
+	if (of_address_to_resource(dn, 0, &res)) {
+		printk(KERN_ERR "opb: Couldn't translate resource\n");
+		return  NULL;
+	}
+
+	opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL);
+	if (!opb) {
+		printk(KERN_ERR "opb: Failed to allocate opb struct!\n");
+		return NULL;
+	}
+
+	/* Get access to the OPB MMIO registers */
+	opb->regs = ioremap(res.start + 0x10000, 0x1000);
+	if (!opb->regs) {
+		printk(KERN_ERR "opb: Failed to allocate register space!\n");
+		goto free_opb;
+	}
+
+	/* Allocate an irq host so that Linux knows that despite only
+	 * having one interrupt to issue, we're the controller for multiple
+	 * hardware IRQs, so later we can lookup their virtual IRQs. */
+
+	opb->host = irq_alloc_host(dn, IRQ_HOST_MAP_LINEAR,
+			OPB_NR_IRQS, &opb_host_ops, -1);
+
+	if (!opb->host) {
+		printk(KERN_ERR "opb: Failed to allocate IRQ host!\n");
+		goto free_regs;
+	}
+
+	opb->index = opb_index++;
+	spin_lock_init(&opb->lock);
+	opb->host->host_data = opb;
+
+	/* Disable all interrupts by default */
+	opb_out(opb, OPB_MLSASIER, 0);
+	opb_out(opb, OPB_MLSIER, 0);
+
+	/* ACK any interrupts left by FW */
+	opb_out(opb, OPB_MLSIR, 0xFFFFFFFF);
+
+	return opb;
+
+free_regs:
+	iounmap(opb->regs);
+free_opb:
+	kfree(opb);
+	return NULL;
+}
+
+void __init opb_pic_init(void)
+{
+	struct device_node *dn;
+	struct opb_pic *opb;
+	int virq;
+	int rc;
+
+	/* Call init_one for each OPB device */
+	for_each_compatible_node(dn, NULL, "ibm,opb") {
+
+		/* Fill in an OPB struct */
+		opb = opb_pic_init_one(dn);
+		if (!opb) {
+			printk(KERN_WARNING "opb: Failed to init node, skipped!\n");
+			continue;
+		}
+
+		/* Map / get opb's hardware virtual irq */
+		virq = irq_of_parse_and_map(dn, 0);
+		if (virq <= 0) {
+			printk("opb: irq_op_parse_and_map failed!\n");
+			continue;
+		}
+
+		/* Attach opb interrupt handler to new virtual IRQ */
+		rc = request_irq(virq, opb_irq_handler, 0, "OPB LS Cascade", opb);
+		if (rc) {
+			printk("opb: request_irq failed: %d\n", rc);
+			continue;
+		}
+
+		printk("OPB%d init with %d IRQs at %p\n", opb->index,
+				OPB_NR_IRQS, opb->regs);
+	}
+}
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
new file mode 100644
index 000000000000..40f28916ff6c
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/psr2.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2008-2011, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/machdep.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+
+static void psr2_spin(void)
+{
+	hard_irq_disable();
+	for (;;) ;
+}
+
+static void psr2_restart(char *cmd)
+{
+	psr2_spin();
+}
+
+static int psr2_probe_devices(void)
+{
+	struct device_node *np;
+
+	/* Our RTC is a ds1500. It seems to be programatically compatible
+	 * with the ds1511 for which we have a driver so let's use that
+	 */
+	np = of_find_compatible_node(NULL, NULL, "dallas,ds1500");
+	if (np != NULL) {
+		struct resource res;
+		if (of_address_to_resource(np, 0, &res) == 0)
+			platform_device_register_simple("ds1511", 0, &res, 1);
+	}
+	return 0;
+}
+machine_arch_initcall(psr2_md, psr2_probe_devices);
+
+static void __init psr2_setup_arch(void)
+{
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	scom_init_wsp();
+
+	/* Setup SMP callback */
+#ifdef CONFIG_SMP
+	a2_setup_smp();
+#endif
+}
+
+static int __init psr2_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,psr2"))
+		return 0;
+
+	return 1;
+}
+
+static void __init psr2_init_irq(void)
+{
+	wsp_init_irq();
+	opb_pic_init();
+}
+
+define_machine(psr2_md) {
+	.name			= "PSR2 A2",
+	.probe			= psr2_probe,
+	.setup_arch		= psr2_setup_arch,
+	.restart		= psr2_restart,
+	.power_off		= psr2_spin,
+	.halt			= psr2_spin,
+	.calibrate_decr		= generic_calibrate_decr,
+	.init_IRQ		= psr2_init_irq,
+	.progress		= udbg_progress,
+	.power_save		= book3e_idle,
+};
diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
new file mode 100644
index 000000000000..141e78032097
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_smp.c
@@ -0,0 +1,427 @@
+/*
+ * SCOM support for A2 platforms
+ *
+ * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson,
+ *		       Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+#define SCOM_RAMC		0x2a		/* Ram Command */
+#define SCOM_RAMC_TGT1_EXT	0x80000000
+#define SCOM_RAMC_SRC1_EXT	0x40000000
+#define SCOM_RAMC_SRC2_EXT	0x20000000
+#define SCOM_RAMC_SRC3_EXT	0x10000000
+#define SCOM_RAMC_ENABLE	0x00080000
+#define SCOM_RAMC_THREADSEL	0x00060000
+#define SCOM_RAMC_EXECUTE	0x00010000
+#define SCOM_RAMC_MSR_OVERRIDE	0x00008000
+#define SCOM_RAMC_MSR_PR	0x00004000
+#define SCOM_RAMC_MSR_GS	0x00002000
+#define SCOM_RAMC_FORCE		0x00001000
+#define SCOM_RAMC_FLUSH		0x00000800
+#define SCOM_RAMC_INTERRUPT	0x00000004
+#define SCOM_RAMC_ERROR		0x00000002
+#define SCOM_RAMC_DONE		0x00000001
+#define SCOM_RAMI		0x29		/* Ram Instruction */
+#define SCOM_RAMIC		0x28		/* Ram Instruction and Command */
+#define SCOM_RAMIC_INSN		0xffffffff00000000
+#define SCOM_RAMD		0x2d		/* Ram Data */
+#define SCOM_RAMDH		0x2e		/* Ram Data High */
+#define SCOM_RAMDL		0x2f		/* Ram Data Low */
+#define SCOM_PCCR0		0x33		/* PC Configuration Register 0 */
+#define SCOM_PCCR0_ENABLE_DEBUG	0x80000000
+#define SCOM_PCCR0_ENABLE_RAM	0x40000000
+#define SCOM_THRCTL		0x30		/* Thread Control and Status */
+#define SCOM_THRCTL_T0_STOP	0x80000000
+#define SCOM_THRCTL_T1_STOP	0x40000000
+#define SCOM_THRCTL_T2_STOP	0x20000000
+#define SCOM_THRCTL_T3_STOP	0x10000000
+#define SCOM_THRCTL_T0_STEP	0x08000000
+#define SCOM_THRCTL_T1_STEP	0x04000000
+#define SCOM_THRCTL_T2_STEP	0x02000000
+#define SCOM_THRCTL_T3_STEP	0x01000000
+#define SCOM_THRCTL_T0_RUN	0x00800000
+#define SCOM_THRCTL_T1_RUN	0x00400000
+#define SCOM_THRCTL_T2_RUN	0x00200000
+#define SCOM_THRCTL_T3_RUN	0x00100000
+#define SCOM_THRCTL_T0_PM	0x00080000
+#define SCOM_THRCTL_T1_PM	0x00040000
+#define SCOM_THRCTL_T2_PM	0x00020000
+#define SCOM_THRCTL_T3_PM	0x00010000
+#define SCOM_THRCTL_T0_UDE	0x00008000
+#define SCOM_THRCTL_T1_UDE	0x00004000
+#define SCOM_THRCTL_T2_UDE	0x00002000
+#define SCOM_THRCTL_T3_UDE	0x00001000
+#define SCOM_THRCTL_ASYNC_DIS	0x00000800
+#define SCOM_THRCTL_TB_DIS	0x00000400
+#define SCOM_THRCTL_DEC_DIS	0x00000200
+#define SCOM_THRCTL_AND		0x31		/* Thread Control and Status */
+#define SCOM_THRCTL_OR		0x32		/* Thread Control and Status */
+
+
+static DEFINE_PER_CPU(scom_map_t, scom_ptrs);
+
+static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread)
+{
+	scom_map_t scom = per_cpu(scom_ptrs, cpu);
+	int tcpu;
+
+	if (scom_map_ok(scom)) {
+		*first_thread = 0;
+		return scom;
+	}
+
+	*first_thread = 1;
+
+	scom = scom_map_device(np, 0);
+
+	for (tcpu = cpu_first_thread_sibling(cpu);
+	     tcpu <= cpu_last_thread_sibling(cpu); tcpu++)
+		per_cpu(scom_ptrs, tcpu) = scom;
+
+	/* Hack: for the boot core, this will actually get called on
+	 * the second thread up, not the first so our test above will
+	 * set first_thread incorrectly. */
+	if (cpu_first_thread_sibling(cpu) == 0)
+		*first_thread = 0;
+
+	return scom;
+}
+
+static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
+{
+	u64 cmd, mask, val;
+	int n = 0;
+
+	cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28)
+		| ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE;
+	mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR;
+
+	scom_write(scom, SCOM_RAMIC, cmd);
+
+	while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) {
+		pr_devel("Waiting on RAMC = 0x%llx\n", val);
+		if (++n == 3) {
+			pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
+			       insn, thread);
+			return -1;
+		}
+	}
+
+	if (val & SCOM_RAMC_INTERRUPT) {
+		pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n",
+		       insn, thread);
+		return -SCOM_RAMC_INTERRUPT;
+	}
+
+	if (val & SCOM_RAMC_ERROR) {
+		pr_err("RAMC error on instruction 0x%08x, thread %d\n",
+		       insn, thread);
+		return -SCOM_RAMC_ERROR;
+	}
+
+	return 0;
+}
+
+static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
+			  u64 *out_gpr)
+{
+	int rc;
+
+	/* or rN, rN, rN */
+	u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11);
+	rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0);
+	if (rc)
+		return rc;
+
+	*out_gpr = scom_read(scom, SCOM_RAMD);
+
+	return 0;
+}
+
+static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
+{
+	int rc, sprhi, sprlo;
+	u32 insn;
+
+	sprhi = spr >> 5;
+	sprlo = spr & 0x1f;
+	insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */
+
+	if (spr == 0x0ff0)
+		insn = 0x7c2000a6; /* mfmsr r1 */
+
+	rc = a2_scom_ram(scom, thread, insn, 0xf);
+	if (rc)
+		return rc;
+	return a2_scom_getgpr(scom, thread, 1, 1, out_spr);
+}
+
+static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr,
+			  int alt, u64 val)
+{
+	u32 lis = 0x3c000000 | (gpr << 21);
+	u32 li = 0x38000000 | (gpr << 21);
+	u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16);
+	u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16);
+	u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16);
+	u32 highest = val >> 48;
+	u32 higher = (val >> 32) & 0xffff;
+	u32 high = (val >> 16) & 0xffff;
+	u32 low = val & 0xffff;
+	int lext = alt ? 0x8 : 0x0;
+	int oext = alt ? 0xf : 0x0;
+	int rc = 0;
+
+	if (highest)
+		rc |= a2_scom_ram(scom, thread, lis | highest, lext);
+
+	if (higher) {
+		if (highest)
+			rc |= a2_scom_ram(scom, thread, oris | higher, oext);
+		else
+			rc |= a2_scom_ram(scom, thread, li | higher, lext);
+	}
+
+	if (highest || higher)
+		rc |= a2_scom_ram(scom, thread, rldicr32, oext);
+
+	if (high) {
+		if (highest || higher)
+			rc |= a2_scom_ram(scom, thread, oris | high, oext);
+		else
+			rc |= a2_scom_ram(scom, thread, lis | high, lext);
+	}
+
+	if (highest || higher || high)
+		rc |= a2_scom_ram(scom, thread, ori | low, oext);
+	else
+		rc |= a2_scom_ram(scom, thread, li | low, lext);
+
+	return rc;
+}
+
+static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val)
+{
+	int sprhi = spr >> 5;
+	int sprlo = spr & 0x1f;
+	/* mtspr spr, r1 */
+	u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11);
+
+	if (spr == 0x0ff0)
+		insn = 0x7c200124; /* mtmsr r1 */
+
+	if (a2_scom_setgpr(scom, thread, 1, 1, val))
+		return -1;
+
+	return a2_scom_ram(scom, thread, insn, 0xf);
+}
+
+static int a2_scom_initial_tlb(scom_map_t scom, int thread)
+{
+	extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[];
+	extern u32 a2_tlbinit_after_iprot_flush[];
+	extern u32 a2_tlbinit_after_linear_map[];
+	u32 assoc, entries, i;
+	u64 epn, tlbcfg;
+	u32 *p;
+	int rc;
+
+	/* Invalidate all entries (including iprot) */
+
+	rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg);
+	if (rc)
+		goto scom_fail;
+	entries = tlbcfg & TLBnCFG_N_ENTRY;
+	assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24;
+	epn = 0;
+
+	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+	a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531);
+	/* Set MMUCR3 to write all thids bit to the TLB */
+	a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f);
+
+	/* Set MAS1 for 1G page size, and MAS2 to our initial EPN */
+	a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB));
+	a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+	for (i = 0; i < entries; i++) {
+
+		a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc));
+
+		/* tlbwe */
+		rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0);
+		if (rc)
+			goto scom_fail;
+
+		/* Next entry is new address? */
+		if((i + 1) % assoc == 0) {
+			epn += (1 << 30);
+			a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+		}
+	}
+
+	/* Setup args for linear mapping */
+	rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0));
+	if (rc)
+		goto scom_fail;
+
+	/* Linear mapping */
+	for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) {
+		rc = a2_scom_ram(scom, thread, *p, 0);
+		if (rc)
+			goto scom_fail;
+	}
+
+	/*
+	 * For the boot thread, between the linear mapping and the debug
+	 * mappings there is a loop to flush iprot mappings. Ramming doesn't do
+	 * branches, but the secondary threads don't need to be nearly as smart
+	 * (i.e. we don't need to worry about invalidating the mapping we're
+	 * standing on).
+	 */
+
+	/* Debug mappings. Expects r11 = MAS0 from linear map (set above) */
+	for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) {
+		rc = a2_scom_ram(scom, thread, *p, 0);
+		if (rc)
+			goto scom_fail;
+	}
+
+scom_fail:
+	if (rc)
+		pr_err("Setting up initial TLB failed, err %d\n", rc);
+
+	if (rc == -SCOM_RAMC_INTERRUPT) {
+		/* Interrupt, dump some status */
+		int rc[10];
+		u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2;
+		rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar);
+		rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0);
+		rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1);
+		rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr);
+		rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0);
+		rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1);
+		rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2);
+		rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3);
+		rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8);
+		rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2);
+		pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]);
+		pr_err("    retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]);
+		pr_err("    retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]);
+		pr_err("    retreived ESR =0x%llx (err %d)\n", esr, rc[3]);
+		pr_err("    retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]);
+		pr_err("    retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]);
+		pr_err("    retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]);
+		pr_err("    retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]);
+		pr_err("    retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]);
+		pr_err("    retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]);
+	}
+
+	return rc;
+}
+
+int __devinit a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+				  struct device_node *np)
+{
+	u64 init_iar, init_msr, init_ccr2;
+	unsigned long start_here;
+	int rc, core_setup;
+	scom_map_t scom;
+	u64 pccr0;
+
+	scom = get_scom(lcpu, np, &core_setup);
+	if (!scom) {
+		printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu);
+		return -1;
+	}
+
+	pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
+
+	pccr0 = scom_read(scom, SCOM_PCCR0);
+	scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
+				     SCOM_PCCR0_ENABLE_RAM);
+
+	/* Stop the thead with THRCTL. If we are setting up the TLB we stop all
+	 * threads. We also disable asynchronous interrupts while RAMing.
+	 */
+	if (core_setup)
+		scom_write(scom, SCOM_THRCTL_OR,
+			      SCOM_THRCTL_T0_STOP |
+			      SCOM_THRCTL_T1_STOP |
+			      SCOM_THRCTL_T2_STOP |
+			      SCOM_THRCTL_T3_STOP |
+			      SCOM_THRCTL_ASYNC_DIS);
+	else
+		scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx);
+
+	/* Flush its pipeline just in case */
+	scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) |
+		      SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE);
+
+	a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar);
+	a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr);
+	a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2);
+
+	/* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */
+	rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM);
+	if (rc) {
+		pr_err("Failed to set MSR ! err %d\n", rc);
+		return rc;
+	}
+
+	/* RAM in an sync/isync for the sake of it */
+	a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0);
+	a2_scom_ram(scom, thr_idx, 0x4c00012c, 0);
+
+	if (core_setup) {
+		pr_devel("CPU%d is first thread in core, initializing TLB...\n",
+			 lcpu);
+		rc = a2_scom_initial_tlb(scom, thr_idx);
+		if (rc)
+			goto fail;
+	}
+
+	start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init
+					: generic_secondary_thread_init);
+	pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here);
+
+	rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here);
+	rc |= a2_scom_setgpr(scom, thr_idx, 3, 0,
+			     get_hard_smp_processor_id(lcpu));
+	/*
+	 * Tell book3e_secondary_core_init not to set up the TLB, we've
+	 * already done that.
+	 */
+	rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1);
+
+	rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx);
+
+	scom_write(scom, SCOM_RAMC, 0);
+	scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx));
+	scom_write(scom, SCOM_PCCR0, pccr0);
+fail:
+	pr_devel("  SCOM initialization %s\n", rc ? "failed" : "succeeded");
+	if (rc) {
+		pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n",
+		       init_iar, init_msr, init_ccr2);
+	}
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
new file mode 100644
index 000000000000..4052e2259f30
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -0,0 +1,77 @@
+/*
+ *  SCOM backend for WSP
+ *
+ *  Copyright 2010 Benjamin Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+
+static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count)
+{
+	struct resource r;
+	u64 xscom_addr;
+
+	if (!of_get_property(dev, "scom-controller", NULL)) {
+		pr_err("%s: device %s is not a SCOM controller\n",
+			__func__, dev->full_name);
+		return SCOM_MAP_INVALID;
+	}
+
+	if (of_address_to_resource(dev, 0, &r)) {
+		pr_debug("Failed to find SCOM controller address\n");
+		return 0;
+	}
+
+	/* Transform the SCOM address into an XSCOM offset */
+	xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3);
+
+	return (scom_map_t)ioremap(r.start + xscom_addr, count << 3);
+}
+
+static void wsp_scom_unmap(scom_map_t map)
+{
+	iounmap((void *)map);
+}
+
+static u64 wsp_scom_read(scom_map_t map, u32 reg)
+{
+	u64 __iomem *addr = (u64 __iomem *)map;
+
+	return in_be64(addr + reg);
+}
+
+static void wsp_scom_write(scom_map_t map, u32 reg, u64 value)
+{
+	u64 __iomem *addr = (u64 __iomem *)map;
+
+	return out_be64(addr + reg, value);
+}
+
+static const struct scom_controller wsp_scom_controller = {
+	.map	= wsp_scom_map,
+	.unmap	= wsp_scom_unmap,
+	.read	= wsp_scom_read,
+	.write	= wsp_scom_write
+};
+
+void scom_init_wsp(void)
+{
+	scom_init(&wsp_scom_controller);
+}
diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c
new file mode 100644
index 000000000000..11ac2f05e01c
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/setup.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+
+#include "wsp.h"
+
+/*
+ * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property.
+ * Won't work for nodes that are not a descendant of a wsp node.
+ */
+int wsp_get_chip_id(struct device_node *dn)
+{
+	const u32 *p;
+	int rc;
+
+	/* Start looking at the specified node, not its parent */
+	dn = of_node_get(dn);
+	while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL)))
+		dn = of_get_next_parent(dn);
+
+	if (!dn)
+		return -1;
+
+	rc = *p;
+	of_node_put(dn);
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
new file mode 100644
index 000000000000..9d20fa9d3710
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/smp.c
@@ -0,0 +1,88 @@
+/*
+ *  SMP Support for A2 platforms
+ *
+ *  Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/dbell.h>
+#include <asm/machdep.h>
+#include <asm/xics.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+static void __devinit smp_a2_setup_cpu(int cpu)
+{
+	doorbell_setup_this_cpu();
+
+	if (cpu != boot_cpuid)
+		xics_setup_cpu();
+}
+
+int __devinit smp_a2_kick_cpu(int nr)
+{
+	const char *enable_method;
+	struct device_node *np;
+	int thr_idx;
+
+	if (nr < 0 || nr >= NR_CPUS)
+		return -ENOENT;
+
+	np = of_get_cpu_node(nr, &thr_idx);
+	if (!np)
+		return -ENODEV;
+
+	enable_method = of_get_property(np, "enable-method", NULL);
+	pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method);
+
+	if (!enable_method) {
+                printk(KERN_ERR "CPU%d has no enable-method\n", nr);
+		return -ENOENT;
+	} else if (strcmp(enable_method, "ibm,a2-scom") == 0) {
+		if (a2_scom_startup_cpu(nr, thr_idx, np))
+			return -1;
+	} else {
+		printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n",
+                       nr, enable_method);
+		return -EINVAL;
+	}
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca[nr].cpu_start = 1;
+
+	return 0;
+}
+
+static int __init smp_a2_probe(void)
+{
+	return cpus_weight(cpu_possible_map);
+}
+
+static struct smp_ops_t a2_smp_ops = {
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= doorbell_cause_ipi,
+	.probe		= smp_a2_probe,
+	.kick_cpu	= smp_a2_kick_cpu,
+	.setup_cpu	= smp_a2_setup_cpu,
+};
+
+void __init a2_setup_smp(void)
+{
+	smp_ops = &a2_smp_ops;
+}
diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h
new file mode 100644
index 000000000000..7c3e087fd2f2
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/wsp.h
@@ -0,0 +1,17 @@
+#ifndef __WSP_H
+#define __WSP_H
+
+#include <asm/wsp.h>
+
+extern void wsp_setup_pci(void);
+extern void scom_init_wsp(void);
+
+extern void a2_setup_smp(void);
+extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+			       struct device_node *np);
+int smp_a2_cpu_bootable(unsigned int nr);
+int __devinit smp_a2_kick_cpu(int nr);
+
+void opb_pic_init(void);
+
+#endif /*  __WSP_H */
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 396582835cb5..d775fd148d13 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -12,3 +12,13 @@ config PPC_MSI_BITMAP
 	depends on PCI_MSI
 	default y if MPIC
 	default y if FSL_PCI
+
+source "arch/powerpc/sysdev/xics/Kconfig"
+
+config PPC_SCOM
+	bool
+
+config SCOM_DEBUGFS
+	bool "Expose SCOM controllers via debugfs"
+	depends on PPC_SCOM
+	default n
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 1e0c933ef772..6076e0074a87 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -57,3 +57,9 @@ obj-$(CONFIG_PPC_MPC52xx)	+= mpc5xxx_clocks.o
 ifeq ($(CONFIG_SUSPEND),y)
 obj-$(CONFIG_6xx)		+= 6xx-suspend.o
 endif
+
+obj-$(CONFIG_PPC_SCOM)		+= scom.o
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-$(CONFIG_PPC_XICS)		+= xics/
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 1636dd896707..bd0d54060b94 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -216,7 +216,7 @@ static int axon_ram_probe(struct platform_device *device)
 			AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
 
 	bank->ph_addr = resource.start;
-	bank->io_addr = (unsigned long) ioremap_flags(
+	bank->io_addr = (unsigned long) ioremap_prot(
 			bank->ph_addr, bank->size, _PAGE_NO_CACHE);
 	if (bank->io_addr == 0) {
 		dev_err(&device->dev, "ioremap() failed\n");
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index e0bc944eb23f..350787c83e22 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -58,21 +58,21 @@ static struct irq_host *cpm_pic_host;
 
 static void cpm_mask_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
 }
 
 static void cpm_unmask_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
 }
 
 static void cpm_end_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	out_be32(&cpic_reg->cpic_cisr, (1 << cpm_vec));
 }
@@ -157,7 +157,7 @@ unsigned int cpm_pic_init(void)
 		goto end;
 
 	/* Initialize the CPM interrupt controller. */
-	hwirq = (unsigned int)irq_map[sirq].hwirq;
+	hwirq = (unsigned int)virq_to_hw(sirq);
 	out_be32(&cpic_reg->cpic_cicr,
 	    (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
 		((hwirq/2) << 13) | CICR_HP_MASK);
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index 5495c1be472b..bcab50e2a9eb 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -81,7 +81,7 @@ static const u_char irq_to_siubit[] = {
 static void cpm2_mask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -93,7 +93,7 @@ static void cpm2_mask_irq(struct irq_data *d)
 static void cpm2_unmask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -105,7 +105,7 @@ static void cpm2_unmask_irq(struct irq_data *d)
 static void cpm2_ack(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -116,7 +116,7 @@ static void cpm2_ack(struct irq_data *d)
 static void cpm2_end_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -133,7 +133,7 @@ static void cpm2_end_irq(struct irq_data *d)
 
 static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vold, vnew, edibit;
 
 	/* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
index 54fb1922fe30..116415899176 100644
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -106,10 +106,10 @@ int __init instantiate_cache_sram(struct platform_device *dev,
 		goto out_free;
 	}
 
-	cache_sram->base_virt = ioremap_flags(cache_sram->base_phys,
+	cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
 				cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
 	if (!cache_sram->base_virt) {
-		dev_err(&dev->dev, "%s: ioremap_flags failed\n",
+		dev_err(&dev->dev, "%s: ioremap_prot failed\n",
 				dev->dev.of_node->full_name);
 		ret = -ENOMEM;
 		goto out_release;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20f..92e78333c47c 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -110,7 +110,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
 	list_for_each_entry(entry, &pdev->msi_list, list) {
 		if (entry->irq == NO_IRQ)
 			continue;
-		msi_data = irq_get_handler_data(entry->irq);
+		msi_data = irq_get_chip_data(entry->irq);
 		irq_set_msi_desc(entry->irq, NULL);
 		msi_bitmap_free_hwirqs(&msi_data->bitmap,
 				       virq_to_hw(entry->irq), 1);
@@ -168,7 +168,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			rc = -ENOSPC;
 			goto out_free;
 		}
-		irq_set_handler_data(virq, msi_data);
+		/* chip_data is msi_data via host->hostdata in host->map() */
 		irq_set_msi_desc(virq, entry);
 
 		fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data);
@@ -193,7 +193,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	u32 have_shift = 0;
 	struct fsl_msi_cascade_data *cascade_data;
 
-	cascade_data = (struct fsl_msi_cascade_data *)irq_get_handler_data(irq);
+	cascade_data = irq_get_handler_data(irq);
 	msi_data = cascade_data->msi_data;
 
 	raw_spin_lock(&desc->lock);
@@ -253,7 +253,7 @@ unlock:
 
 static int fsl_of_msi_remove(struct platform_device *ofdev)
 {
-	struct fsl_msi *msi = ofdev->dev.platform_data;
+	struct fsl_msi *msi = platform_get_drvdata(ofdev);
 	int virq, i;
 	struct fsl_msi_cascade_data *cascade_data;
 
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
 	return 0;
 }
 
+static const struct of_device_id fsl_of_msi_ids[];
 static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 {
+	const struct of_device_id *match;
 	struct fsl_msi *msi;
 	struct resource res;
 	int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 	u32 offset;
 	static const u32 all_avail[] = { 0, NR_MSI_IRQS };
 
-	if (!dev->dev.of_match)
+	match = of_match_device(fsl_of_msi_ids, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	features = dev->dev.of_match->data;
+	features = match->data;
 
 	printk(KERN_DEBUG "Setting up Freescale MSI support\n");
 
@@ -327,7 +330,7 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 		dev_err(&dev->dev, "No memory for MSI structure\n");
 		return -ENOMEM;
 	}
-	dev->dev.platform_data = msi;
+	platform_set_drvdata(dev, msi);
 
 	msi->irqhost = irq_alloc_host(dev->dev.of_node, IRQ_HOST_MAP_LINEAR,
 				      NR_MSI_IRQS, &fsl_msi_host_ops, 0);
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index f8f7f28c6343..68ca9290df94 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -324,6 +324,11 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary)
 	struct resource rsrc;
 	const int *bus_range;
 
+	if (!of_device_is_available(dev)) {
+		pr_warning("%s: disabled\n", dev->full_name);
+		return -ENODEV;
+	}
+
 	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
 
 	/* Fetch host bridge registers address */
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 14232d57369c..49798532b477 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -1457,7 +1457,6 @@ int fsl_rio_setup(struct platform_device *dev)
 	port->ops = ops;
 	port->priv = priv;
 	port->phys_efptr = 0x100;
-	rio_register_mport(port);
 
 	priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1);
 	rio_regs_win = priv->regs_win;
@@ -1504,6 +1503,9 @@ int fsl_rio_setup(struct platform_device *dev)
 	dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n",
 			port->sys_size ? 65536 : 256);
 
+	if (rio_register_mport(port))
+		goto err;
+
 	if (port->host_deviceid >= 0)
 		out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST |
 			RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED);
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 142770cb84b6..d18bb27e4df9 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -185,18 +185,6 @@ static int i8259_host_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void i8259_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	/* Make sure irq is masked in hardware */
-	i8259_mask_irq(irq_get_irq_data(virq));
-
-	/* remove chip and handler */
-	irq_set_chip_and_handler(virq, NULL, NULL);
-
-	/* Make sure it's completed */
-	synchronize_irq(virq);
-}
-
 static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
 			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
@@ -220,7 +208,6 @@ static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
 static struct irq_host_ops i8259_host_ops = {
 	.match = i8259_host_match,
 	.map = i8259_host_map,
-	.unmap = i8259_host_unmap,
 	.xlate = i8259_host_xlate,
 };
 
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index fa438be962b7..7367d17364cb 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -18,7 +18,7 @@
 #include <linux/stddef.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/device.h>
 #include <linux/bootmem.h>
 #include <linux/spinlock.h>
@@ -521,12 +521,10 @@ static inline struct ipic * ipic_from_irq(unsigned int virq)
 	return primary_ipic;
 }
 
-#define ipic_irq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 static void ipic_unmask_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -542,7 +540,7 @@ static void ipic_unmask_irq(struct irq_data *d)
 static void ipic_mask_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -562,7 +560,7 @@ static void ipic_mask_irq(struct irq_data *d)
 static void ipic_ack_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -581,7 +579,7 @@ static void ipic_ack_irq(struct irq_data *d)
 static void ipic_mask_irq_and_ack(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -604,7 +602,7 @@ static void ipic_mask_irq_and_ack(struct irq_data *d)
 static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vold, vnew, edibit;
 
 	if (flow_type == IRQ_TYPE_NONE)
@@ -793,7 +791,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
 int ipic_set_priority(unsigned int virq, unsigned int priority)
 {
 	struct ipic *ipic = ipic_from_irq(virq);
-	unsigned int src = ipic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	u32 temp;
 
 	if (priority > 7)
@@ -821,7 +819,7 @@ int ipic_set_priority(unsigned int virq, unsigned int priority)
 void ipic_set_highest_priority(unsigned int virq)
 {
 	struct ipic *ipic = ipic_from_irq(virq);
-	unsigned int src = ipic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	u32 temp;
 
 	temp = ipic_read(ipic->regs, IPIC_SICFR);
@@ -902,7 +900,7 @@ static struct {
 	u32 sercr;
 } ipic_saved_state;
 
-static int ipic_suspend(struct sys_device *sdev, pm_message_t state)
+static int ipic_suspend(void)
 {
 	struct ipic *ipic = primary_ipic;
 
@@ -933,7 +931,7 @@ static int ipic_suspend(struct sys_device *sdev, pm_message_t state)
 	return 0;
 }
 
-static int ipic_resume(struct sys_device *sdev)
+static void ipic_resume(void)
 {
 	struct ipic *ipic = primary_ipic;
 
@@ -949,44 +947,26 @@ static int ipic_resume(struct sys_device *sdev)
 	ipic_write(ipic->regs, IPIC_SECNR, ipic_saved_state.secnr);
 	ipic_write(ipic->regs, IPIC_SERMR, ipic_saved_state.sermr);
 	ipic_write(ipic->regs, IPIC_SERCR, ipic_saved_state.sercr);
-
-	return 0;
 }
 #else
 #define ipic_suspend NULL
 #define ipic_resume NULL
 #endif
 
-static struct sysdev_class ipic_sysclass = {
-	.name = "ipic",
+static struct syscore_ops ipic_syscore_ops = {
 	.suspend = ipic_suspend,
 	.resume = ipic_resume,
 };
 
-static struct sys_device device_ipic = {
-	.id		= 0,
-	.cls		= &ipic_sysclass,
-};
-
-static int __init init_ipic_sysfs(void)
+static int __init init_ipic_syscore(void)
 {
-	int rc;
-
 	if (!primary_ipic || !primary_ipic->regs)
 		return -ENODEV;
-	printk(KERN_DEBUG "Registering ipic with sysfs...\n");
 
-	rc = sysdev_class_register(&ipic_sysclass);
-	if (rc) {
-		printk(KERN_ERR "Failed registering ipic sys class\n");
-		return -ENODEV;
-	}
-	rc = sysdev_register(&device_ipic);
-	if (rc) {
-		printk(KERN_ERR "Failed registering ipic sys device\n");
-		return -ENODEV;
-	}
+	printk(KERN_DEBUG "Registering ipic system core operations\n");
+	register_syscore_ops(&ipic_syscore_ops);
+
 	return 0;
 }
 
-subsys_initcall(init_ipic_sysfs);
+subsys_initcall(init_ipic_syscore);
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
index 207324209065..ddc877a3a23a 100644
--- a/arch/powerpc/sysdev/mmio_nvram.c
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -115,6 +115,8 @@ int __init mmio_nvram_init(void)
 	int ret;
 
 	nvram_node = of_find_node_by_type(NULL, "nvram");
+	if (!nvram_node)
+		nvram_node = of_find_compatible_node(NULL, NULL, "nvram");
 	if (!nvram_node) {
 		printk(KERN_WARNING "nvram: no node found in device-tree\n");
 		return -ENODEV;
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index a88800ff4d01..20924f2246f0 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -28,7 +28,7 @@ int cpm_get_irq(struct pt_regs *regs);
 static void mpc8xx_unmask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -40,7 +40,7 @@ static void mpc8xx_unmask_irq(struct irq_data *d)
 static void mpc8xx_mask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -52,7 +52,7 @@ static void mpc8xx_mask_irq(struct irq_data *d)
 static void mpc8xx_ack(struct irq_data *d)
 {
 	int	bit;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	out_be32(&siu_reg->sc_sipend, 1 << (31-bit));
@@ -61,7 +61,7 @@ static void mpc8xx_ack(struct irq_data *d)
 static void mpc8xx_end_irq(struct irq_data *d)
 {
 	int bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -73,7 +73,7 @@ static void mpc8xx_end_irq(struct irq_data *d)
 static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	if (flow_type & IRQ_TYPE_EDGE_FALLING) {
-		irq_hw_number_t hw = (unsigned int)irq_map[d->irq].hwirq;
+		irq_hw_number_t hw = (unsigned int)irqd_to_hwirq(d);
 		unsigned int siel = in_be32(&siu_reg->sc_siel);
 
 		/* only external IRQ senses are programmable */
diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index 0892a2841c2b..fb4963abdf55 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -163,7 +163,7 @@ static void mpc8xxx_irq_unmask(struct irq_data *d)
 
 	spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 
-	setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 
 	spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 }
@@ -176,7 +176,7 @@ static void mpc8xxx_irq_mask(struct irq_data *d)
 
 	spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 
-	clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 
 	spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 }
@@ -186,7 +186,7 @@ static void mpc8xxx_irq_ack(struct irq_data *d)
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
 	struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
 
-	out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 }
 
 static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
@@ -199,14 +199,14 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
 	case IRQ_TYPE_EDGE_FALLING:
 		spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 		setbits32(mm->regs + GPIO_ICR,
-			  mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+			  mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 		spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 		break;
 
 	case IRQ_TYPE_EDGE_BOTH:
 		spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 		clrbits32(mm->regs + GPIO_ICR,
-			  mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+			  mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 		spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 		break;
 
@@ -221,7 +221,7 @@ static int mpc512x_irq_set_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
 	struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
-	unsigned long gpio = virq_to_hw(d->irq);
+	unsigned long gpio = irqd_to_hwirq(d);
 	void __iomem *reg;
 	unsigned int shift;
 	unsigned long flags;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index f91c065bed5a..3a8de5bb628a 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -6,6 +6,7 @@
  *  with various broken implementations of this HW.
  *
  *  Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *  Copyright 2010-2011 Freescale Semiconductor, Inc.
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of this archive
@@ -27,6 +28,7 @@
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/ptrace.h>
 #include <asm/signal.h>
@@ -218,6 +220,28 @@ static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 valu
 	_mpic_write(mpic->reg_type, &mpic->gregs, offset, value);
 }
 
+static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm)
+{
+	unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+			      ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+	if (tm >= 4)
+		offset += 0x1000 / 4;
+
+	return _mpic_read(mpic->reg_type, &mpic->tmregs, offset);
+}
+
+static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value)
+{
+	unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+			      ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+	if (tm >= 4)
+		offset += 0x1000 / 4;
+
+	_mpic_write(mpic->reg_type, &mpic->tmregs, offset, value);
+}
+
 static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
 {
 	unsigned int cpu = mpic_processor_id(mpic);
@@ -268,6 +292,8 @@ static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
 #define mpic_write(b,r,v)	_mpic_write(mpic->reg_type,&(b),(r),(v))
 #define mpic_ipi_read(i)	_mpic_ipi_read(mpic,(i))
 #define mpic_ipi_write(i,v)	_mpic_ipi_write(mpic,(i),(v))
+#define mpic_tm_read(i)		_mpic_tm_read(mpic,(i))
+#define mpic_tm_write(i,v)	_mpic_tm_write(mpic,(i),(v))
 #define mpic_cpu_read(i)	_mpic_cpu_read(mpic,(i))
 #define mpic_cpu_write(i,v)	_mpic_cpu_write(mpic,(i),(v))
 #define mpic_irq_read(s,r)	_mpic_irq_read(mpic,(s),(r))
@@ -607,8 +633,6 @@ static int irq_choose_cpu(const struct cpumask *mask)
 }
 #endif
 
-#define mpic_irq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 /* Find an mpic associated with a given linux interrupt */
 static struct mpic *mpic_find(unsigned int irq)
 {
@@ -621,11 +645,18 @@ static struct mpic *mpic_find(unsigned int irq)
 /* Determine if the linux irq is an IPI */
 static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq)
 {
-	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned int src = virq_to_hw(irq);
 
 	return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
 }
 
+/* Determine if the linux irq is a timer */
+static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int irq)
+{
+	unsigned int src = virq_to_hw(irq);
+
+	return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]);
+}
 
 /* Convert a cpu mask from logical to physical cpu numbers. */
 static inline u32 mpic_physmask(u32 cpumask)
@@ -633,7 +664,7 @@ static inline u32 mpic_physmask(u32 cpumask)
 	int i;
 	u32 mask = 0;
 
-	for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1)
+	for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
 		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
 	return mask;
 }
@@ -674,7 +705,7 @@ void mpic_unmask_irq(struct irq_data *d)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src);
 
@@ -695,7 +726,7 @@ void mpic_mask_irq(struct irq_data *d)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src);
 
@@ -733,7 +764,7 @@ void mpic_end_irq(struct irq_data *d)
 static void mpic_unmask_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_unmask_irq(d);
 
@@ -744,7 +775,7 @@ static void mpic_unmask_ht_irq(struct irq_data *d)
 static unsigned int mpic_startup_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_unmask_irq(d);
 	mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d));
@@ -755,7 +786,7 @@ static unsigned int mpic_startup_ht_irq(struct irq_data *d)
 static void mpic_shutdown_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_shutdown_ht_interrupt(mpic, src);
 	mpic_mask_irq(d);
@@ -764,7 +795,7 @@ static void mpic_shutdown_ht_irq(struct irq_data *d)
 static void mpic_end_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 #ifdef DEBUG_IRQ
 	DBG("%s: end_irq: %d\n", mpic->name, d->irq);
@@ -785,7 +816,7 @@ static void mpic_end_ht_irq(struct irq_data *d)
 static void mpic_unmask_ipi(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_ipi(d);
-	unsigned int src = mpic_irq_to_hw(d->irq) - mpic->ipi_vecs[0];
+	unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0];
 
 	DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src);
 	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
@@ -812,27 +843,42 @@ static void mpic_end_ipi(struct irq_data *d)
 
 #endif /* CONFIG_SMP */
 
+static void mpic_unmask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, irq, src);
+	mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
+static void mpic_mask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
 int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
 		      bool force)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	if (mpic->flags & MPIC_SINGLE_DEST_CPU) {
 		int cpuid = irq_choose_cpu(cpumask);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
 	} else {
-		cpumask_var_t tmp;
-
-		alloc_cpumask_var(&tmp, GFP_KERNEL);
+		u32 mask = cpumask_bits(cpumask)[0];
 
-		cpumask_and(tmp, cpumask, cpu_online_mask);
+		mask &= cpumask_bits(cpu_online_mask)[0];
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
-			       mpic_physmask(cpumask_bits(tmp)[0]));
-
-		free_cpumask_var(tmp);
+			       mpic_physmask(mask));
 	}
 
 	return 0;
@@ -862,7 +908,7 @@ static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
 int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vecpri, vold, vnew;
 
 	DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n",
@@ -898,7 +944,7 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 void mpic_set_vector(unsigned int virq, unsigned int vector)
 {
 	struct mpic *mpic = mpic_from_irq(virq);
-	unsigned int src = mpic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	unsigned int vecpri;
 
 	DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n",
@@ -916,7 +962,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector)
 void mpic_set_destination(unsigned int virq, unsigned int cpuid)
 {
 	struct mpic *mpic = mpic_from_irq(virq);
-	unsigned int src = mpic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 
 	DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n",
 	    mpic, virq, src, cpuid);
@@ -942,6 +988,12 @@ static struct irq_chip mpic_ipi_chip = {
 };
 #endif /* CONFIG_SMP */
 
+static struct irq_chip mpic_tm_chip = {
+	.irq_mask	= mpic_mask_tm,
+	.irq_unmask	= mpic_unmask_tm,
+	.irq_eoi	= mpic_end_irq,
+};
+
 #ifdef CONFIG_MPIC_U3_HT_IRQS
 static struct irq_chip mpic_irq_ht_chip = {
 	.irq_startup	= mpic_startup_ht_irq,
@@ -985,6 +1037,16 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq,
 	}
 #endif /* CONFIG_SMP */
 
+	if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) {
+		WARN_ON(!(mpic->flags & MPIC_PRIMARY));
+
+		DBG("mpic: mapping as timer\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_tm,
+					 handle_fasteoi_irq);
+		return 0;
+	}
+
 	if (hw >= mpic->irq_count)
 		return -EINVAL;
 
@@ -1025,6 +1087,7 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
+	struct mpic *mpic = h->host_data;
 	static unsigned char map_mpic_senses[4] = {
 		IRQ_TYPE_EDGE_RISING,
 		IRQ_TYPE_LEVEL_LOW,
@@ -1033,7 +1096,38 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
 	};
 
 	*out_hwirq = intspec[0];
-	if (intsize > 1) {
+	if (intsize >= 4 && (mpic->flags & MPIC_FSL)) {
+		/*
+		 * Freescale MPIC with extended intspec:
+		 * First two cells are as usual.  Third specifies
+		 * an "interrupt type".  Fourth is type-specific data.
+		 *
+		 * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+		 */
+		switch (intspec[2]) {
+		case 0:
+		case 1: /* no EISR/EIMR support for now, treat as shared IRQ */
+			break;
+		case 2:
+			if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->ipi_vecs[intspec[0]];
+			break;
+		case 3:
+			if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->timer_vecs[intspec[0]];
+			break;
+		default:
+			pr_debug("%s: unknown irq type %u\n",
+				 __func__, intspec[2]);
+			return -EINVAL;
+		}
+
+		*out_flags = map_mpic_senses[intspec[1] & 3];
+	} else if (intsize > 1) {
 		u32 mask = 0x3;
 
 		/* Apple invented a new race of encoding on machines with
@@ -1109,6 +1203,9 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	mpic->hc_ipi.name = name;
 #endif /* CONFIG_SMP */
 
+	mpic->hc_tm = mpic_tm_chip;
+	mpic->hc_tm.name = name;
+
 	mpic->flags = flags;
 	mpic->isu_size = isu_size;
 	mpic->irq_count = irq_count;
@@ -1119,10 +1216,14 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	else
 		intvec_top = 255;
 
-	mpic->timer_vecs[0] = intvec_top - 8;
-	mpic->timer_vecs[1] = intvec_top - 7;
-	mpic->timer_vecs[2] = intvec_top - 6;
-	mpic->timer_vecs[3] = intvec_top - 5;
+	mpic->timer_vecs[0] = intvec_top - 12;
+	mpic->timer_vecs[1] = intvec_top - 11;
+	mpic->timer_vecs[2] = intvec_top - 10;
+	mpic->timer_vecs[3] = intvec_top - 9;
+	mpic->timer_vecs[4] = intvec_top - 8;
+	mpic->timer_vecs[5] = intvec_top - 7;
+	mpic->timer_vecs[6] = intvec_top - 6;
+	mpic->timer_vecs[7] = intvec_top - 5;
 	mpic->ipi_vecs[0]   = intvec_top - 4;
 	mpic->ipi_vecs[1]   = intvec_top - 3;
 	mpic->ipi_vecs[2]   = intvec_top - 2;
@@ -1132,6 +1233,8 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	/* Check for "big-endian" in device-tree */
 	if (node && of_get_property(node, "big-endian", NULL) != NULL)
 		mpic->flags |= MPIC_BIG_ENDIAN;
+	if (node && of_device_is_compatible(node, "fsl,mpic"))
+		mpic->flags |= MPIC_FSL;
 
 	/* Look for protected sources */
 	if (node) {
@@ -1323,15 +1426,17 @@ void __init mpic_init(struct mpic *mpic)
 	/* Set current processor priority to max */
 	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
 
-	/* Initialize timers: just disable them all */
+	/* Initialize timers to our reserved vectors and mask them for now */
 	for (i = 0; i < 4; i++) {
 		mpic_write(mpic->tmregs,
 			   i * MPIC_INFO(TIMER_STRIDE) +
-			   MPIC_INFO(TIMER_DESTINATION), 0);
+			   MPIC_INFO(TIMER_DESTINATION),
+			   1 << hard_smp_processor_id());
 		mpic_write(mpic->tmregs,
 			   i * MPIC_INFO(TIMER_STRIDE) +
 			   MPIC_INFO(TIMER_VECTOR_PRI),
 			   MPIC_VECPRI_MASK |
+			   (9 << MPIC_VECPRI_PRIORITY_SHIFT) |
 			   (mpic->timer_vecs[0] + i));
 	}
 
@@ -1427,7 +1532,7 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable)
 void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 {
 	struct mpic *mpic = mpic_find(irq);
-	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned int src = virq_to_hw(irq);
 	unsigned long flags;
 	u32 reg;
 
@@ -1440,6 +1545,11 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 			~MPIC_VECPRI_PRIORITY_MASK;
 		mpic_ipi_write(src - mpic->ipi_vecs[0],
 			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else if (mpic_is_tm(mpic, irq)) {
+		reg = mpic_tm_read(src - mpic->timer_vecs[0]) &
+			~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_tm_write(src - mpic->timer_vecs[0],
+			      reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
 	} else {
 		reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI))
 			& ~MPIC_VECPRI_PRIORITY_MASK;
@@ -1619,46 +1729,28 @@ void mpic_request_ipis(void)
 	}
 }
 
-static void mpic_send_ipi(unsigned int ipi_no, const struct cpumask *cpu_mask)
+void smp_mpic_message_pass(int cpu, int msg)
 {
 	struct mpic *mpic = mpic_primary;
+	u32 physmask;
 
 	BUG_ON(mpic == NULL);
 
-#ifdef DEBUG_IPI
-	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
-#endif
-
-	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
-		       ipi_no * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE),
-		       mpic_physmask(cpumask_bits(cpu_mask)[0]));
-}
-
-void smp_mpic_message_pass(int target, int msg)
-{
-	cpumask_var_t tmp;
-
 	/* make sure we're sending something that translates to an IPI */
 	if ((unsigned int)msg > 3) {
 		printk("SMP %d: smp_message_pass: unknown msg %d\n",
 		       smp_processor_id(), msg);
 		return;
 	}
-	switch (target) {
-	case MSG_ALL:
-		mpic_send_ipi(msg, cpu_online_mask);
-		break;
-	case MSG_ALL_BUT_SELF:
-		alloc_cpumask_var(&tmp, GFP_NOWAIT);
-		cpumask_andnot(tmp, cpu_online_mask,
-			       cpumask_of(smp_processor_id()));
-		mpic_send_ipi(msg, tmp);
-		free_cpumask_var(tmp);
-		break;
-	default:
-		mpic_send_ipi(msg, cpumask_of(target));
-		break;
-	}
+
+#ifdef DEBUG_IPI
+	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg);
+#endif
+
+	physmask = 1 << get_hard_smp_processor_id(cpu);
+
+	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
+		       msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
 }
 
 int __init smp_mpic_probe(void)
@@ -1702,9 +1794,8 @@ void mpic_reset_core(int cpu)
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PM
-static int mpic_suspend(struct sys_device *dev, pm_message_t state)
+static void mpic_suspend_one(struct mpic *mpic)
 {
-	struct mpic *mpic = container_of(dev, struct mpic, sysdev);
 	int i;
 
 	for (i = 0; i < mpic->num_sources; i++) {
@@ -1713,13 +1804,22 @@ static int mpic_suspend(struct sys_device *dev, pm_message_t state)
 		mpic->save_data[i].dest =
 			mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION));
 	}
+}
+
+static int mpic_suspend(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_suspend_one(mpic);
+		mpic = mpic->next;
+	}
 
 	return 0;
 }
 
-static int mpic_resume(struct sys_device *dev)
+static void mpic_resume_one(struct mpic *mpic)
 {
-	struct mpic *mpic = container_of(dev, struct mpic, sysdev);
 	int i;
 
 	for (i = 0; i < mpic->num_sources; i++) {
@@ -1746,33 +1846,28 @@ static int mpic_resume(struct sys_device *dev)
 	}
 #endif
 	} /* end for loop */
+}
 
-	return 0;
+static void mpic_resume(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_resume_one(mpic);
+		mpic = mpic->next;
+	}
 }
-#endif
 
-static struct sysdev_class mpic_sysclass = {
-#ifdef CONFIG_PM
+static struct syscore_ops mpic_syscore_ops = {
 	.resume = mpic_resume,
 	.suspend = mpic_suspend,
-#endif
-	.name = "mpic",
 };
 
 static int mpic_init_sys(void)
 {
-	struct mpic *mpic = mpics;
-	int error, id = 0;
-
-	error = sysdev_class_register(&mpic_sysclass);
-
-	while (mpic && !error) {
-		mpic->sysdev.cls = &mpic_sysclass;
-		mpic->sysdev.id = id++;
-		error = sysdev_register(&mpic->sysdev);
-		mpic = mpic->next;
-	}
-	return error;
+	register_syscore_ops(&mpic_syscore_ops);
+	return 0;
 }
 
 device_initcall(mpic_init_sys);
+#endif
diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c
index e9c633c7c083..14d130268e7a 100644
--- a/arch/powerpc/sysdev/mv64x60_pic.c
+++ b/arch/powerpc/sysdev/mv64x60_pic.c
@@ -78,7 +78,7 @@ static struct irq_host *mv64x60_irq_host;
 
 static void mv64x60_mask_low(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -91,7 +91,7 @@ static void mv64x60_mask_low(struct irq_data *d)
 
 static void mv64x60_unmask_low(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -115,7 +115,7 @@ static struct irq_chip mv64x60_chip_low = {
 
 static void mv64x60_mask_high(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -128,7 +128,7 @@ static void mv64x60_mask_high(struct irq_data *d)
 
 static void mv64x60_unmask_high(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -152,7 +152,7 @@ static struct irq_chip mv64x60_chip_high = {
 
 static void mv64x60_mask_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -165,7 +165,7 @@ static void mv64x60_mask_gpp(struct irq_data *d)
 
 static void mv64x60_mask_ack_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -180,7 +180,7 @@ static void mv64x60_mask_ack_gpp(struct irq_data *d)
 
 static void mv64x60_unmask_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 832d6924ad1c..b2acda07220d 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -197,12 +197,10 @@ static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d)
 	return irq_data_get_irq_chip_data(d);
 }
 
-#define virq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 static void qe_ic_unmask_irq(struct irq_data *d)
 {
 	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -218,7 +216,7 @@ static void qe_ic_unmask_irq(struct irq_data *d)
 static void qe_ic_mask_irq(struct irq_data *d)
 {
 	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
new file mode 100644
index 000000000000..b2593ce30c9b
--- /dev/null
+++ b/arch/powerpc/sysdev/scom.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *     and        David Gibson, IBM Corporation.
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/scom.h>
+
+const struct scom_controller *scom_controller;
+EXPORT_SYMBOL_GPL(scom_controller);
+
+struct device_node *scom_find_parent(struct device_node *node)
+{
+	struct device_node *par, *tmp;
+	const u32 *p;
+
+	for (par = of_node_get(node); par;) {
+		if (of_get_property(par, "scom-controller", NULL))
+			break;
+		p = of_get_property(par, "scom-parent", NULL);
+		tmp = par;
+		if (p == NULL)
+			par = of_get_parent(par);
+		else
+			par = of_find_node_by_phandle(*p);
+		of_node_put(tmp);
+	}
+	return par;
+}
+EXPORT_SYMBOL_GPL(scom_find_parent);
+
+scom_map_t scom_map_device(struct device_node *dev, int index)
+{
+	struct device_node *parent;
+	unsigned int cells, size;
+	const u32 *prop;
+	u64 reg, cnt;
+	scom_map_t ret;
+
+	parent = scom_find_parent(dev);
+
+	if (parent == NULL)
+		return 0;
+
+	prop = of_get_property(parent, "#scom-cells", NULL);
+	cells = prop ? *prop : 1;
+
+	prop = of_get_property(dev, "scom-reg", &size);
+	if (!prop)
+		return 0;
+	size >>= 2;
+
+	if (index >= (size / (2*cells)))
+		return 0;
+
+	reg = of_read_number(&prop[index * cells * 2], cells);
+	cnt = of_read_number(&prop[index * cells * 2 + cells], cells);
+
+	ret = scom_map(parent, reg, cnt);
+	of_node_put(parent);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scom_map_device);
+
+#ifdef CONFIG_SCOM_DEBUGFS
+struct scom_debug_entry {
+	struct device_node *dn;
+	unsigned long addr;
+	scom_map_t map;
+	spinlock_t lock;
+	char name[8];
+	struct debugfs_blob_wrapper blob;
+};
+
+static int scom_addr_set(void *data, u64 val)
+{
+	struct scom_debug_entry *ent = data;
+
+	ent->addr = 0;
+	scom_unmap(ent->map);
+
+	ent->map = scom_map(ent->dn, val, 1);
+	if (scom_map_ok(ent->map))
+		ent->addr = val;
+	else
+		return -EFAULT;
+
+	return 0;
+}
+
+static int scom_addr_get(void *data, u64 *val)
+{
+	struct scom_debug_entry *ent = data;
+	*val = ent->addr;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_addr_fops, scom_addr_get, scom_addr_set,
+			"0x%llx\n");
+
+static int scom_val_set(void *data, u64 val)
+{
+	struct scom_debug_entry *ent = data;
+
+	if (!scom_map_ok(ent->map))
+		return -EFAULT;
+
+	scom_write(ent->map, 0, val);
+
+	return 0;
+}
+
+static int scom_val_get(void *data, u64 *val)
+{
+	struct scom_debug_entry *ent = data;
+
+	if (!scom_map_ok(ent->map))
+		return -EFAULT;
+
+	*val = scom_read(ent->map, 0);
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_val_fops, scom_val_get, scom_val_set,
+			"0x%llx\n");
+
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+			       int i)
+{
+	struct scom_debug_entry *ent;
+	struct dentry *dir;
+
+	ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+	if (!ent)
+		return -ENOMEM;
+
+	ent->dn = of_node_get(dn);
+	ent->map = SCOM_MAP_INVALID;
+	spin_lock_init(&ent->lock);
+	snprintf(ent->name, 8, "scom%d", i);
+	ent->blob.data = dn->full_name;
+	ent->blob.size = strlen(dn->full_name);
+
+	dir = debugfs_create_dir(ent->name, root);
+	if (!dir) {
+		of_node_put(dn);
+		kfree(ent);
+		return -1;
+	}
+
+	debugfs_create_file("addr", 0600, dir, ent, &scom_addr_fops);
+	debugfs_create_file("value", 0600, dir, ent, &scom_val_fops);
+	debugfs_create_blob("path", 0400, dir, &ent->blob);
+
+	return 0;
+}
+
+static int scom_debug_init(void)
+{
+	struct device_node *dn;
+	struct dentry *root;
+	int i, rc;
+
+	root = debugfs_create_dir("scom", powerpc_debugfs_root);
+	if (!root)
+		return -1;
+
+	i = rc = 0;
+	for_each_node_with_property(dn, "scom-controller")
+		rc |= scom_debug_init_one(root, dn, i++);
+
+	return rc;
+}
+device_initcall(scom_debug_init);
+#endif /* CONFIG_SCOM_DEBUGFS */
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 5d9138516628..984cd2029158 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -41,8 +41,6 @@
 #define UIC_VR		0x7
 #define UIC_VCR		0x8
 
-#define uic_irq_to_hw(virq)	(irq_map[virq].hwirq)
-
 struct uic *primary_uic;
 
 struct uic {
@@ -58,7 +56,7 @@ struct uic {
 static void uic_unmask_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er, sr;
 
@@ -76,7 +74,7 @@ static void uic_unmask_irq(struct irq_data *d)
 static void uic_mask_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er;
 
@@ -90,7 +88,7 @@ static void uic_mask_irq(struct irq_data *d)
 static void uic_ack_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 
 	spin_lock_irqsave(&uic->lock, flags);
@@ -101,7 +99,7 @@ static void uic_ack_irq(struct irq_data *d)
 static void uic_mask_ack_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er, sr;
 
@@ -126,7 +124,7 @@ static void uic_mask_ack_irq(struct irq_data *d)
 static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	int trigger, polarity;
 	u32 tr, pr, mask;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
new file mode 100644
index 000000000000..0031eda320c3
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -0,0 +1,13 @@
+config PPC_XICS
+       def_bool n
+       select PPC_SMP_MUXED_IPI
+
+config PPC_ICP_NATIVE
+       def_bool n
+
+config PPC_ICP_HV
+       def_bool n
+
+config PPC_ICS_RTAS
+       def_bool n
+
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
new file mode 100644
index 000000000000..b75a6059337f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -0,0 +1,6 @@
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y				+= xics-common.o
+obj-$(CONFIG_PPC_ICP_NATIVE)	+= icp-native.o
+obj-$(CONFIG_PPC_ICP_HV)	+= icp-hv.o
+obj-$(CONFIG_PPC_ICS_RTAS)	+= ics-rtas.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
new file mode 100644
index 000000000000..9518d367a64f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_XIRR, retbuf, cppr);
+	if (rc != H_SUCCESS)
+		panic(" bad return code xirr - rc = %lx\n", rc);
+	return (unsigned int)retbuf[0];
+}
+
+static inline void icp_hv_set_xirr(unsigned int value)
+{
+	long rc = plpar_hcall_norets(H_EOI, value);
+	if (rc != H_SUCCESS)
+		panic("bad return code EOI - rc = %ld, value=%x\n", rc, value);
+}
+
+static inline void icp_hv_set_cppr(u8 value)
+{
+	long rc = plpar_hcall_norets(H_CPPR, value);
+	if (rc != H_SUCCESS)
+		panic("bad return code cppr - rc = %lx\n", rc);
+}
+
+static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+{
+	long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu),
+				     value);
+	if (rc != H_SUCCESS)
+		panic("bad return code qirr - rc = %lx\n", rc);
+}
+
+static void icp_hv_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_hv_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_hv_set_qirr(cpu, 0xff);
+}
+
+static void icp_hv_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_hv_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_hv_get_irq(void)
+{
+	unsigned int xirr = icp_hv_get_xirr(xics_cppr_top());
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return NO_IRQ;
+
+	irq = irq_radix_revmap_lookup(xics_host, vec);
+	if (likely(irq != NO_IRQ)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_hv_set_xirr(xirr);
+
+	return NO_IRQ;
+}
+
+static void icp_hv_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_hv_set_cppr(cppr);
+	iosync();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_hv_cause_ipi(int cpu, unsigned long data)
+{
+	icp_hv_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_hv_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_hv_ops = {
+	.get_irq	= icp_hv_get_irq,
+	.eoi		= icp_hv_eoi,
+	.set_priority	= icp_hv_set_cpu_priority,
+	.teardown_cpu	= icp_hv_teardown_cpu,
+	.flush_ipi	= icp_hv_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_hv_ipi_action,
+	.cause_ipi	= icp_hv_cause_ipi,
+#endif
+};
+
+int icp_hv_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp");
+	if (!np)
+		np = of_find_node_by_type(NULL,
+				    "PowerPC-External-Interrupt-Presentation");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_hv_ops;
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
new file mode 100644
index 000000000000..1f15ad436140
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+
+struct icp_ipl {
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr_poll;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr;
+	u32 dummy;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} qirr;
+	u32 link_a;
+	u32 link_b;
+	u32 link_c;
+};
+
+static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
+
+static inline unsigned int icp_native_get_xirr(void)
+{
+	int cpu = smp_processor_id();
+
+	return in_be32(&icp_native_regs[cpu]->xirr.word);
+}
+
+static inline void icp_native_set_xirr(unsigned int value)
+{
+	int cpu = smp_processor_id();
+
+	out_be32(&icp_native_regs[cpu]->xirr.word, value);
+}
+
+static inline void icp_native_set_cppr(u8 value)
+{
+	int cpu = smp_processor_id();
+
+	out_8(&icp_native_regs[cpu]->xirr.bytes[0], value);
+}
+
+static inline void icp_native_set_qirr(int n_cpu, u8 value)
+{
+	out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value);
+}
+
+static void icp_native_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_native_set_cppr(cppr);
+	iosync();
+}
+
+static void icp_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_native_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_native_set_qirr(cpu, 0xff);
+}
+
+static void icp_native_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_native_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_native_get_irq(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return NO_IRQ;
+
+	irq = irq_radix_revmap_lookup(xics_host, vec);
+	if (likely(irq != NO_IRQ)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_native_set_xirr(xirr);
+
+	return NO_IRQ;
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_native_cause_ipi(int cpu, unsigned long data)
+{
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_native_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
+					 unsigned long size)
+{
+	char *rname;
+	int i, cpu = -1;
+
+	/* This may look gross but it's good enough for now, we don't quite
+	 * have a hard -> linux processor id matching.
+	 */
+	for_each_possible_cpu(i) {
+		if (!cpu_present(i))
+			continue;
+		if (hw_id == get_hard_smp_processor_id(i)) {
+			cpu = i;
+			break;
+		}
+	}
+
+	/* Fail, skip that CPU. Don't print, it's normal, some XICS come up
+	 * with way more entries in there than you have CPUs
+	 */
+	if (cpu == -1)
+		return 0;
+
+	rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
+			  cpu, hw_id);
+
+	if (!request_mem_region(addr, size, rname)) {
+		pr_warning("icp_native: Could not reserve ICP MMIO"
+			   " for CPU %d, interrupt server #0x%x\n",
+			   cpu, hw_id);
+		return -EBUSY;
+	}
+
+	icp_native_regs[cpu] = ioremap(addr, size);
+	if (!icp_native_regs[cpu]) {
+		pr_warning("icp_native: Failed ioremap for CPU %d, "
+			   "interrupt server #0x%x, addr %#lx\n",
+			   cpu, hw_id, addr);
+		release_mem_region(addr, size);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int __init icp_native_init_one_node(struct device_node *np,
+					   unsigned int *indx)
+{
+	unsigned int ilen;
+	const u32 *ireg;
+	int i;
+	int reg_tuple_size;
+	int num_servers = 0;
+
+	/* This code does the theorically broken assumption that the interrupt
+	 * server numbers are the same as the hard CPU numbers.
+	 * This happens to be the case so far but we are playing with fire...
+	 * should be fixed one of these days. -BenH.
+	 */
+	ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen);
+
+	/* Do that ever happen ? we'll know soon enough... but even good'old
+	 * f80 does have that property ..
+	 */
+	WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32)));
+
+	if (ireg) {
+		*indx = of_read_number(ireg, 1);
+		if (ilen >= 2*sizeof(u32))
+			num_servers = of_read_number(ireg + 1, 1);
+	}
+
+	ireg = of_get_property(np, "reg", &ilen);
+	if (!ireg) {
+		pr_err("icp_native: Can't find interrupt reg property");
+		return -1;
+	}
+
+	reg_tuple_size = (of_n_addr_cells(np) + of_n_size_cells(np)) * 4;
+	if (((ilen % reg_tuple_size) != 0)
+	    || (num_servers && (num_servers != (ilen / reg_tuple_size)))) {
+		pr_err("icp_native: ICP reg len (%d) != num servers (%d)",
+		       ilen / reg_tuple_size, num_servers);
+		return -1;
+	}
+
+	for (i = 0; i < (ilen / reg_tuple_size); i++) {
+		struct resource r;
+		int err;
+
+		err = of_address_to_resource(np, i, &r);
+		if (err) {
+			pr_err("icp_native: Could not translate ICP MMIO"
+			       " for interrupt server 0x%x (%d)\n", *indx, err);
+			return -1;
+		}
+
+		if (icp_native_map_one_cpu(*indx, r.start, r.end - r.start))
+			return -1;
+
+		(*indx)++;
+	}
+	return 0;
+}
+
+static const struct icp_ops icp_native_ops = {
+	.get_irq	= icp_native_get_irq,
+	.eoi		= icp_native_eoi,
+	.set_priority	= icp_native_set_cpu_priority,
+	.teardown_cpu	= icp_native_teardown_cpu,
+	.flush_ipi	= icp_native_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_native_ipi_action,
+	.cause_ipi	= icp_native_cause_ipi,
+#endif
+};
+
+int icp_native_init(void)
+{
+	struct device_node *np;
+	u32 indx = 0;
+	int found = 0;
+
+	for_each_compatible_node(np, NULL, "ibm,ppc-xicp")
+		if (icp_native_init_one_node(np, &indx) == 0)
+			found = 1;
+	if (!found) {
+		for_each_node_by_type(np,
+			"PowerPC-External-Interrupt-Presentation") {
+				if (icp_native_init_one_node(np, &indx) == 0)
+					found = 1;
+		}
+	}
+
+	if (found == 0)
+		return -ENODEV;
+
+	icp_ops = &icp_native_ops;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
new file mode 100644
index 000000000000..c782f85cf7e4
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -0,0 +1,240 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/rtas.h>
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq);
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node);
+
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.map		= ics_rtas_map,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
+};
+
+static void ics_rtas_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int call_status;
+	int server;
+
+	pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, d->affinity, 0);
+
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+				DEFAULT_PRIORITY);
+	if (call_status != 0) {
+		printk(KERN_ERR
+			"%s: ibm_set_xive irq %u server %x returned %d\n",
+			__func__, hw_irq, server, call_status);
+		return;
+	}
+
+	/* Now unmask the interrupt (often a no-op) */
+	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static unsigned int ics_rtas_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	if (d->msi_desc)
+		unmask_msi_irq(d);
+#endif
+	/* unmask it */
+	ics_rtas_unmask_irq(d);
+	return 0;
+}
+
+static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+{
+	int call_status;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+				xics_default_server, 0xff);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static void ics_rtas_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_rtas_mask_real_irq(hw_irq);
+}
+
+static int ics_rtas_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int status;
+	int xics_status[2];
+	int irq_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	irq_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (irq_server == -1) {
+		char cpulist[128];
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		printk(KERN_WARNING
+			"%s: No online cpus in the mask %s for irq %d\n",
+			__func__, cpulist, d->irq);
+		return -1;
+	}
+
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+			   hw_irq, irq_server, xics_status[1]);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_rtas_irq_chip = {
+	.name = "XICS",
+	.irq_startup = ics_rtas_startup,
+	.irq_mask = ics_rtas_mask_irq,
+	.irq_unmask = ics_rtas_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_rtas_set_affinity
+};
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq)
+{
+	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
+	int status[2];
+	int rc;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if RTAS knows about this interrupt */
+	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+	if (rc)
+		return -ENXIO;
+
+	irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq);
+	irq_set_chip_data(virq, &ics_rtas);
+
+	return 0;
+}
+
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	ics_rtas_mask_real_irq(vec);
+}
+
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+{
+	int rc, status[2];
+
+	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+	if (rc)
+		return -1;
+	return status[0];
+}
+
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
+{
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !of_device_is_compatible(node, "chrp,iic");
+}
+
+int ics_rtas_init(void)
+{
+	ibm_get_xive = rtas_token("ibm,get-xive");
+	ibm_set_xive = rtas_token("ibm,set-xive");
+	ibm_int_on  = rtas_token("ibm,int-on");
+	ibm_int_off = rtas_token("ibm,int-off");
+
+	/* We enable the RTAS "ICS" if RTAS is present with the
+	 * appropriate tokens
+	 */
+	if (ibm_get_xive == RTAS_UNKNOWN_SERVICE ||
+	    ibm_set_xive == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_rtas_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_rtas);
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
new file mode 100644
index 000000000000..445c5a01b766
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/rtas.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+
+/* Globals common to all ICP/ICS implementations */
+const struct icp_ops	*icp_ops;
+
+unsigned int xics_default_server		= 0xff;
+unsigned int xics_default_distrib_server	= 0;
+unsigned int xics_interrupt_server_size		= 8;
+
+DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
+
+struct irq_host *xics_host;
+
+static LIST_HEAD(ics_list);
+
+void xics_update_irq_servers(void)
+{
+	int i, j;
+	struct device_node *np;
+	u32 ilen;
+	const u32 *ireg;
+	u32 hcpuid;
+
+	/* Find the server numbers for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hcpuid = get_hard_smp_processor_id(boot_cpuid);
+	xics_default_server = xics_default_distrib_server = hcpuid;
+
+	pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server);
+
+	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+	if (!ireg) {
+		of_node_put(np);
+		return;
+	}
+
+	i = ilen / sizeof(int);
+
+	/* Global interrupt distribution server is specified in the last
+	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+	 * entry fom this property for current boot cpu id and use it as
+	 * default distribution server
+	 */
+	for (j = 0; j < i; j += 2) {
+		if (ireg[j] == hcpuid) {
+			xics_default_distrib_server = ireg[j+1];
+			break;
+		}
+	}
+	pr_devel("xics: xics_default_distrib_server = 0x%x\n",
+		 xics_default_distrib_server);
+	of_node_put(np);
+}
+
+/* GIQ stuff, currently only supported on RTAS setups, will have
+ * to be sorted properly for bare metal
+ */
+void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+#ifdef CONFIG_PPC_RTAS
+	int index;
+	int status;
+
+	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+		return;
+
+	index = (1UL << xics_interrupt_server_size) - 1 - gserver;
+
+	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
+#endif
+}
+
+void xics_setup_cpu(void)
+{
+	icp_ops->set_priority(LOWEST_PRIORITY);
+
+	xics_set_cpu_giq(xics_default_distrib_server, 1);
+}
+
+void xics_mask_unknown_vec(unsigned int vec)
+{
+	struct ics *ics;
+
+	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
+
+	list_for_each_entry(ics, &ics_list, link)
+		ics->mask_unknown(ics, vec);
+}
+
+
+#ifdef CONFIG_SMP
+
+static void xics_request_ipi(void)
+{
+	unsigned int ipi;
+
+	ipi = irq_create_mapping(xics_host, XICS_IPI);
+	BUG_ON(ipi == NO_IRQ);
+
+	/*
+	 * IPIs are marked IRQF_DISABLED as they must run with irqs
+	 * disabled, and PERCPU.  The handler was set in map.
+	 */
+	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
+			   IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL));
+}
+
+int __init xics_smp_probe(void)
+{
+	/* Setup cause_ipi callback  based on which ICP is used */
+	smp_ops->cause_ipi = icp_ops->cause_ipi;
+
+	/* Register all the IPIs */
+	xics_request_ipi();
+
+	return cpumask_weight(cpu_possible_mask);
+}
+
+#endif /* CONFIG_SMP */
+
+void xics_teardown_cpu(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	/*
+	 * we have to reset the cppr index to 0 because we're
+	 * not going to return from the IPI
+	 */
+	os_cppr->index = 0;
+	icp_ops->set_priority(0);
+	icp_ops->teardown_cpu();
+}
+
+void xics_kexec_teardown_cpu(int secondary)
+{
+	xics_teardown_cpu();
+
+	icp_ops->flush_ipi();
+
+	/*
+	 * Some machines need to have at least one cpu in the GIQ,
+	 * so leave the master cpu in the group.
+	 */
+	if (secondary)
+		xics_set_cpu_giq(xics_default_distrib_server, 0);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Interrupts are disabled. */
+void xics_migrate_irqs_away(void)
+{
+	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+	unsigned int irq, virq;
+
+	/* If we used to be the default server, move to the new "boot_cpuid" */
+	if (hw_cpu == xics_default_server)
+		xics_update_irq_servers();
+
+	/* Reject any interrupt that was queued to us... */
+	icp_ops->set_priority(0);
+
+	/* Remove ourselves from the global interrupt queue */
+	xics_set_cpu_giq(xics_default_distrib_server, 0);
+
+	/* Allow IPIs again... */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
+	for_each_irq(virq) {
+		struct irq_desc *desc;
+		struct irq_chip *chip;
+		long server;
+		unsigned long flags;
+		struct ics *ics;
+
+		/* We can't set affinity on ISA interrupts */
+		if (virq < NUM_ISA_INTERRUPTS)
+			continue;
+		if (!virq_is_host(virq, xics_host))
+			continue;
+		irq = (unsigned int)virq_to_hw(virq);
+		/* We need to get IPIs still. */
+		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+			continue;
+		desc = irq_to_desc(virq);
+		/* We only need to migrate enabled IRQS */
+		if (!desc || !desc->action)
+			continue;
+		chip = irq_desc_get_chip(desc);
+		if (!chip || !chip->irq_set_affinity)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		/* Locate interrupt server */
+		server = -1;
+		ics = irq_get_chip_data(virq);
+		if (ics)
+			server = ics->get_server(ics, irq);
+		if (server < 0) {
+			printk(KERN_ERR "%s: Can't find server for irq %d\n",
+			       __func__, irq);
+			goto unlock;
+		}
+
+		/* We only support delivery to all cpus or to one cpu.
+		 * The irq has to be migrated only in the single cpu
+		 * case.
+		 */
+		if (server != hw_cpu)
+			goto unlock;
+
+		/* This is expected during cpu offline. */
+		if (cpu_online(cpu))
+			pr_warning("IRQ %u affinity broken off cpu %u\n",
+			       virq, cpu);
+
+		/* Reset affinity to all cpus */
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		irq_set_affinity(virq, cpu_all_mask);
+		continue;
+unlock:
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+/*
+ * For the moment we only implement delivery to all cpus or one cpu.
+ *
+ * If the requested affinity is cpu_all_mask, we set global affinity.
+ * If not we set it to the first cpu in the mask, even if multiple cpus
+ * are set. This is so things like irqbalance (which set core and package
+ * wide affinities) do the right thing.
+ *
+ * We need to fix this to implement support for the links
+ */
+int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			unsigned int strict_check)
+{
+
+	if (!distribute_irqs)
+		return xics_default_server;
+
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
+		int server = cpumask_first_and(cpu_online_mask, cpumask);
+
+		if (server < nr_cpu_ids)
+			return get_hard_smp_processor_id(server);
+
+		if (strict_check)
+			return -1;
+	}
+
+	/*
+	 * Workaround issue with some versions of JS20 firmware that
+	 * deliver interrupts to cpus which haven't been started. This
+	 * happens when using the maxcpus= boot option.
+	 */
+	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
+		return xics_default_distrib_server;
+
+	return xics_default_server;
+}
+#endif /* CONFIG_SMP */
+
+static int xics_host_match(struct irq_host *h, struct device_node *node)
+{
+	struct ics *ics;
+
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->host_match(ics, node))
+			return 1;
+
+	return 0;
+}
+
+/* Dummies */
+static void xics_ipi_unmask(struct irq_data *d) { }
+static void xics_ipi_mask(struct irq_data *d) { }
+
+static struct irq_chip xics_ipi_chip = {
+	.name = "XICS",
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_mask = xics_ipi_mask,
+	.irq_unmask = xics_ipi_unmask,
+};
+
+static int xics_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ics *ics;
+
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+	/* Insert the interrupt mapping into the radix tree for fast lookup */
+	irq_radix_revmap_insert(xics_host, virq, hw);
+
+	/* They aren't all level sensitive but we just don't really know */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+
+	/* Don't call into ICS for IPIs */
+	if (hw == XICS_IPI) {
+		irq_set_chip_and_handler(virq, &xics_ipi_chip,
+					 handle_percpu_irq);
+		return 0;
+	}
+
+	/* Let the ICS setup the chip data */
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->map(ics, virq) == 0)
+			return 0;
+
+	return -EINVAL;
+}
+
+static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Current xics implementation translates everything
+	 * to level. It is not technically right for MSIs but this
+	 * is irrelevant at this point. We might get smarter in the future
+	 */
+	*out_hwirq = intspec[0];
+	*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+static struct irq_host_ops xics_host_ops = {
+	.match = xics_host_match,
+	.map = xics_host_map,
+	.xlate = xics_host_xlate,
+};
+
+static void __init xics_init_host(void)
+{
+	xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
+				   XICS_IRQ_SPURIOUS);
+	BUG_ON(xics_host == NULL);
+	irq_set_default_host(xics_host);
+}
+
+void __init xics_register_ics(struct ics *ics)
+{
+	list_add(&ics->link, &ics_list);
+}
+
+static void __init xics_get_server_size(void)
+{
+	struct device_node *np;
+	const u32 *isize;
+
+	/* We fetch the interrupt server size from the first ICS node
+	 * we find if any
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
+	if (!np)
+		return;
+	isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+	if (!isize)
+		return;
+	xics_interrupt_server_size = *isize;
+	of_node_put(np);
+}
+
+void __init xics_init(void)
+{
+	int rc = -1;
+
+	/* Fist locate ICP */
+#ifdef CONFIG_PPC_ICP_HV
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		rc = icp_hv_init();
+#endif
+#ifdef CONFIG_PPC_ICP_NATIVE
+	if (rc < 0)
+		rc = icp_native_init();
+#endif
+	if (rc < 0) {
+		pr_warning("XICS: Cannot find a Presentation Controller !\n");
+		return;
+	}
+
+	/* Copy get_irq callback over to ppc_md */
+	ppc_md.get_irq = icp_ops->get_irq;
+
+	/* Patch up IPI chip EOI */
+	xics_ipi_chip.irq_eoi = icp_ops->eoi;
+
+	/* Now locate ICS */
+#ifdef CONFIG_PPC_ICS_RTAS
+	rc = ics_rtas_init();
+#endif
+	if (rc < 0)
+		pr_warning("XICS: Cannot find a Source Controller !\n");
+
+	/* Initialize common bits */
+	xics_get_server_size();
+	xics_update_irq_servers();
+	xics_init_host();
+	xics_setup_cpu();
+}
diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 0a13fc19e287..6183799754af 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -71,7 +71,7 @@ static unsigned char xilinx_intc_map_senses[] = {
  */
 static void xilinx_intc_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("mask: %d\n", irq);
 	out_be32(regs + XINTC_CIE, 1 << irq);
@@ -87,7 +87,7 @@ static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type)
  */
 static void xilinx_intc_level_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("unmask: %d\n", irq);
 	out_be32(regs + XINTC_SIE, 1 << irq);
@@ -112,7 +112,7 @@ static struct irq_chip xilinx_intc_level_irqchip = {
  */
 static void xilinx_intc_edge_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void *regs = irq_data_get_irq_chip_data(d);
 	pr_debug("unmask: %d\n", irq);
 	out_be32(regs + XINTC_SIE, 1 << irq);
@@ -120,7 +120,7 @@ static void xilinx_intc_edge_unmask(struct irq_data *d)
 
 static void xilinx_intc_edge_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("ack: %d\n", irq);
 	out_be32(regs + XINTC_IAR, 1 << irq);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 33794c1d92c3..42541bbcc7fa 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -334,7 +334,7 @@ static void release_output_lock(void)
 
 int cpus_are_in_xmon(void)
 {
-	return !cpus_empty(cpus_in_xmon);
+	return !cpumask_empty(&cpus_in_xmon);
 }
 #endif
 
@@ -373,7 +373,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
 #ifdef CONFIG_SMP
 	cpu = smp_processor_id();
-	if (cpu_isset(cpu, cpus_in_xmon)) {
+	if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 		get_output_lock();
 		excprint(regs);
 		printf("cpu 0x%x: Exception %lx %s in xmon, "
@@ -396,10 +396,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	}
 
 	xmon_fault_jmp[cpu] = recurse_jmp;
-	cpu_set(cpu, cpus_in_xmon);
+	cpumask_set_cpu(cpu, &cpus_in_xmon);
 
 	bp = NULL;
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
 		bp = at_breakpoint(regs->nip);
 	if (bp || unrecoverable_excp(regs))
 		fromipi = 0;
@@ -437,10 +437,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		xmon_owner = cpu;
 		mb();
 		if (ncpus > 1) {
-			smp_send_debugger_break(MSG_ALL_BUT_SELF);
+			smp_send_debugger_break();
 			/* wait for other cpus to come in */
 			for (timeout = 100000000; timeout != 0; --timeout) {
-				if (cpus_weight(cpus_in_xmon) >= ncpus)
+				if (cpumask_weight(&cpus_in_xmon) >= ncpus)
 					break;
 				barrier();
 			}
@@ -484,7 +484,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 	}
  leave:
-	cpu_clear(cpu, cpus_in_xmon);
+	cpumask_clear_cpu(cpu, &cpus_in_xmon);
 	xmon_fault_jmp[cpu] = NULL;
 #else
 	/* UP is simple... */
@@ -529,7 +529,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 	}
 #else
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
 		bp = at_breakpoint(regs->nip);
 		if (bp != NULL) {
 			int stepped = emulate_step(regs, bp->instr[0]);
@@ -578,7 +578,7 @@ static int xmon_bpt(struct pt_regs *regs)
 	struct bpt *bp;
 	unsigned long offset;
 
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 
 	/* Are we at the trap at bp->instr[1] for some bp? */
@@ -609,7 +609,7 @@ static int xmon_sstep(struct pt_regs *regs)
 
 static int xmon_dabr_match(struct pt_regs *regs)
 {
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 	if (dabr.enabled == 0)
 		return 0;
@@ -619,7 +619,7 @@ static int xmon_dabr_match(struct pt_regs *regs)
 
 static int xmon_iabr_match(struct pt_regs *regs)
 {
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 	if (iabr == NULL)
 		return 0;
@@ -630,7 +630,7 @@ static int xmon_iabr_match(struct pt_regs *regs)
 static int xmon_ipi(struct pt_regs *regs)
 {
 #ifdef CONFIG_SMP
-	if (in_xmon && !cpu_isset(smp_processor_id(), cpus_in_xmon))
+	if (in_xmon && !cpumask_test_cpu(smp_processor_id(), &cpus_in_xmon))
 		xmon_core(regs, 1);
 #endif
 	return 0;
@@ -644,7 +644,7 @@ static int xmon_fault_handler(struct pt_regs *regs)
 	if (in_xmon && catch_memory_errors)
 		handle_fault(regs);	/* doesn't return */
 
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
 		bp = in_breakpoint_table(regs->nip, &offset);
 		if (bp != NULL) {
 			regs->nip = bp->address + offset;
@@ -929,7 +929,7 @@ static int do_step(struct pt_regs *regs)
 	int stepped;
 
 	/* check we are in 64-bit kernel mode, translation enabled */
-	if ((regs->msr & (MSR_SF|MSR_PR|MSR_IR)) == (MSR_SF|MSR_IR)) {
+	if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
 		if (mread(regs->nip, &instr, 4) == 4) {
 			stepped = emulate_step(regs, instr);
 			if (stepped < 0) {
@@ -976,7 +976,7 @@ static int cpu_cmd(void)
 		printf("cpus stopped:");
 		count = 0;
 		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-			if (cpu_isset(cpu, cpus_in_xmon)) {
+			if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 				if (count == 0)
 					printf(" %x", cpu);
 				++count;
@@ -992,7 +992,7 @@ static int cpu_cmd(void)
 		return 0;
 	}
 	/* try to switch to cpu specified */
-	if (!cpu_isset(cpu, cpus_in_xmon)) {
+	if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 		printf("cpu 0x%x isn't in xmon\n", cpu);
 		return 0;
 	}
@@ -1497,6 +1497,10 @@ static void prregs(struct pt_regs *fp)
 #endif
 	printf("pc  = ");
 	xmon_print_symbol(fp->nip, " ", "\n");
+	if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) {
+		printf("cfar= ");
+		xmon_print_symbol(fp->orig_gpr3, " ", "\n");
+	}
 	printf("lr  = ");
 	xmon_print_symbol(fp->link, " ", "\n");
 	printf("msr = "REG"   cr  = %.8lx\n", fp->msr, fp->ccr);
@@ -2663,7 +2667,7 @@ static void dump_stab(void)
 
 void dump_segments(void)
 {
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		dump_slb();
 	else
 		dump_stab();
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2508a6f31588..4a7f14079e03 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -88,6 +88,7 @@ config S390
 	select HAVE_KERNEL_XZ
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_ARCH_MUTEX_CPU_RELAX
+	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 975e3ab13cb5..8b16c479585b 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -76,7 +76,7 @@ static void prng_seed(int nbytes)
 
 	/* Add the entropy */
 	while (nbytes >= 8) {
-		*((__u64 *)parm_block) ^= *((__u64 *)buf+i*8);
+		*((__u64 *)parm_block) ^= *((__u64 *)(buf+i));
 		prng_add_entropy();
 		i += 8;
 		nbytes -= 8;
diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h
index 43a5c78046db..3e20383d0921 100644
--- a/arch/s390/include/asm/cacheflush.h
+++ b/arch/s390/include/asm/cacheflush.h
@@ -11,5 +11,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable);
 int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
 
 #endif /* _S390_CACHEFLUSH_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 72b2e2f2d32d..7e91c58072e2 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -9,9 +9,22 @@
 #define _ASM_S390_DIAG_H
 
 /*
- * Diagnose 10: Release pages
+ * Diagnose 10: Release page range
  */
-extern void diag10(unsigned long addr);
+static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
+{
+	unsigned long start_addr, end_addr;
+
+	start_addr = start_pfn << PAGE_SHIFT;
+	end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+
+	asm volatile(
+		"0:	diag	%0,%1,0x10\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		EX_TABLE(1b, 1b)
+		: : "a" (start_addr), "a" (end_addr));
+}
 
 /*
  * Diagnose 14: Input spool file manipulation
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 3c29be4836ed..b7931faaef6d 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,15 +11,13 @@ struct dyn_arch_ftrace { };
 
 #ifdef CONFIG_64BIT
 #define MCOUNT_INSN_SIZE  12
-#define MCOUNT_OFFSET	   8
 #else
 #define MCOUNT_INSN_SIZE  20
-#define MCOUNT_OFFSET	   4
 #endif
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
-	return addr - MCOUNT_OFFSET;
+	return addr;
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 000000000000..95a6cf2b5b67
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_S390_JUMP_LABEL_H
+#define _ASM_S390_JUMP_LABEL_H
+
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 6
+
+#ifdef CONFIG_64BIT
+#define ASM_PTR ".quad"
+#define ASM_ALIGN ".balign 8"
+#else
+#define ASM_PTR ".long"
+#define ASM_ALIGN ".balign 4"
+#endif
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("0:	brcl 0,0\n"
+		".pushsection __jump_table, \"aw\"\n"
+		ASM_ALIGN "\n"
+		ASM_PTR " 0b, %l[label], %0\n"
+		".popsection\n"
+		: : "X" (key) : : label);
+	return false;
+label:
+	return true;
+}
+
+typedef unsigned long jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index a6f0e7cc9cde..8c277caa8d3a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,7 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
 #ifdef CONFIG_64BIT
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
-	if (current->mm->context.alloc_pgste) {
+	if (current->mm && current->mm->context.alloc_pgste) {
 		/*
 		 * alloc_pgste indicates, that any NEW context will be created
 		 * with extended page tables. The old context is unchanged. The
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 64230bc392fa..5ff15dacb571 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o setup.o \
 	    processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
 	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
-	    vdso.o vtime.o sysinfo.o nmi.o sclp.o
+	    vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index c032d11da8a1..8237fc07ac79 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -9,27 +9,6 @@
 #include <asm/diag.h>
 
 /*
- * Diagnose 10: Release pages
- */
-void diag10(unsigned long addr)
-{
-	if (addr >= 0x7ff00000)
-		return;
-	asm volatile(
-#ifdef CONFIG_64BIT
-		"	sam31\n"
-		"	diag	%0,%0,0x10\n"
-		"0:	sam64\n"
-#else
-		"	diag	%0,%0,0x10\n"
-		"0:\n"
-#endif
-		EX_TABLE(0b, 0b)
-		: : "a" (addr));
-}
-EXPORT_SYMBOL(diag10);
-
-/*
  * Diagnose 14: Input spool file manipulation
  */
 int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index c83726c9fe03..3d4a78fc1adc 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -672,6 +672,7 @@ static struct insn opcode_b2[] = {
 	{ "rp", 0x77, INSTR_S_RD },
 	{ "stcke", 0x78, INSTR_S_RD },
 	{ "sacf", 0x79, INSTR_S_RD },
+	{ "spp", 0x80, INSTR_S_RD },
 	{ "stsi", 0x7d, INSTR_S_RD },
 	{ "srnm", 0x99, INSTR_S_RD },
 	{ "stfpc", 0x9c, INSTR_S_RD },
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 648f64239a9d..1b67fc6ebdc2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -836,7 +836,7 @@ restart_base:
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
 	basr	%r14,0
 	l	%r14,restart_addr-.(%r14)
-	br	%r14			# branch to start_secondary
+	basr	%r14,%r14		# branch to start_secondary
 restart_addr:
 	.long	start_secondary
 	.align	8
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9d3603d6c511..9fd864563499 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -841,7 +841,7 @@ restart_base:
 	mvc	__LC_SYSTEM_TIMER(8),__TI_system_timer(%r1)
 	xc	__LC_STEAL_TIMER(8),__LC_STEAL_TIMER
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
-	jg	start_secondary
+	brasl	%r14,start_secondary
 	.align	8
 restart_vtime:
 	.long	0x7fffffff,0xffffffff
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 000000000000..44cc06bedf77
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,59 @@
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <linux/jump_label.h>
+#include <asm/ipl.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+struct insn {
+	u16 opcode;
+	s32 offset;
+} __packed;
+
+struct insn_args {
+	unsigned long *target;
+	struct insn *insn;
+	ssize_t size;
+};
+
+static int __arch_jump_label_transform(void *data)
+{
+	struct insn_args *args = data;
+	int rc;
+
+	rc = probe_kernel_write(args->target, args->insn, args->size);
+	WARN_ON_ONCE(rc < 0);
+	return 0;
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	struct insn_args args;
+	struct insn insn;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		/* brcl 15,offset */
+		insn.opcode = 0xc0f4;
+		insn.offset = (entry->target - entry->code) >> 1;
+	} else {
+		/* brcl 0,0 */
+		insn.opcode = 0xc004;
+		insn.offset = 0;
+	}
+
+	args.target = (void *) entry->code;
+	args.insn = &insn;
+	args.size = JUMP_LABEL_NOP_SIZE;
+
+	stop_machine(__arch_jump_label_transform, &args, NULL);
+}
+
+#endif
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 63a97db83f96..63c7d9ff220d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
 	/*
 	 * handle bit signal external calls
-	 *
-	 * For the ec_schedule signal we have to do nothing. All the work
-	 * is done automatically when we return from the interrupt.
 	 */
 	bits = xchg(&S390_lowcore.ext_call_fast, 0);
 
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+
 	if (test_bit(ec_call_function, &bits))
 		generic_smp_call_function_interrupt();
 
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index 7e9d30d567b0..ab0e041ac54c 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -48,10 +48,10 @@ sie_irq_handler:
 	tm	__TI_flags+7(%r2),_TIF_EXIT_SIE
 	jz	0f
 	larl	%r2,sie_exit			# work pending, leave sie
-	stg	%r2,__LC_RETURN_PSW+8
+	stg	%r2,SPI_PSW+8(0,%r15)
 	br	%r14
 0:	larl	%r2,sie_reenter			# re-enter with guest id
-	stg	%r2,__LC_RETURN_PSW+8
+	stg	%r2,SPI_PSW+8(0,%r15)
 1:	br	%r14
 
 /*
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index c66ffd8dbbb7..1f1dba9dcf58 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -91,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter,
 			} else
 				free_page((unsigned long) npa);
 		}
-		diag10(addr);
+		diag10_range(addr >> PAGE_SHIFT, 1);
 		pa->pages[pa->index++] = addr;
 		(*counter)++;
 		spin_unlock(&cmm_lock);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9217e332b118..ab988135e5c6 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -543,7 +543,6 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	struct task_struct *tsk;
 	__u16 subcode;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
 	/*
 	 * Get the external interruption subcode & pfault
 	 * initial/completion signal bit. VM stores this 
@@ -553,14 +552,15 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	subcode = ext_int_code >> 16;
 	if ((subcode & 0xff00) != __SUBCODE_MASK)
 		return;
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
 
 	/*
 	 * Get the token (= address of the task structure of the affected task).
 	 */
 #ifdef CONFIG_64BIT
-	tsk = *(struct task_struct **) param64;
+	tsk = (struct task_struct *) param64;
 #else
-	tsk = *(struct task_struct **) param32;
+	tsk = (struct task_struct *) param32;
 #endif
 
 	if (subcode & 0x0080) {
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 122ffbd08ce0..f05edcc3beff 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -24,12 +24,13 @@ static void change_page_attr(unsigned long addr, int numpages,
 			WARN_ON_ONCE(1);
 			continue;
 		}
-		ptep = pte_offset_kernel(pmdp, addr + i * PAGE_SIZE);
+		ptep = pte_offset_kernel(pmdp, addr);
 
 		pte = *ptep;
 		pte = set(pte);
-		ptep_invalidate(&init_mm, addr + i * PAGE_SIZE, ptep);
+		ptep_invalidate(&init_mm, addr, ptep);
 		*ptep = pte;
+		addr += PAGE_SIZE;
 	}
 }
 
@@ -53,3 +54,8 @@ int set_memory_nx(unsigned long addr, int numpages)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(set_memory_nx);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return 0;
+}
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 4952872d6f0a..33cbd373cce4 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -1021,20 +1021,14 @@ deallocate_exit:
 	return rc;
 }
 
-long hwsampler_query_min_interval(void)
+unsigned long hwsampler_query_min_interval(void)
 {
-	if (min_sampler_rate)
-		return min_sampler_rate;
-	else
-		return -EINVAL;
+	return min_sampler_rate;
 }
 
-long hwsampler_query_max_interval(void)
+unsigned long hwsampler_query_max_interval(void)
 {
-	if (max_sampler_rate)
-		return max_sampler_rate;
-	else
-		return -EINVAL;
+	return max_sampler_rate;
 }
 
 unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 8c72b59316b5..1912f3bb190c 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -102,8 +102,8 @@ int hwsampler_setup(void);
 int hwsampler_shutdown(void);
 int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
 int hwsampler_deallocate(void);
-long hwsampler_query_min_interval(void);
-long hwsampler_query_max_interval(void);
+unsigned long hwsampler_query_min_interval(void);
+unsigned long hwsampler_query_max_interval(void);
 int hwsampler_start_all(unsigned long interval);
 int hwsampler_stop_all(void);
 int hwsampler_deactivate(unsigned int cpu);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index c63d7e58352b..5995e9bc72d9 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -145,15 +145,11 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 	 * create hwsampler files only if hwsampler_setup() succeeds.
 	 */
 	oprofile_min_interval = hwsampler_query_min_interval();
-	if (oprofile_min_interval < 0) {
-		oprofile_min_interval = 0;
+	if (oprofile_min_interval == 0)
 		return -ENODEV;
-	}
 	oprofile_max_interval = hwsampler_query_max_interval();
-	if (oprofile_max_interval < 0) {
-		oprofile_max_interval = 0;
+	if (oprofile_max_interval == 0)
 		return -ENODEV;
-	}
 
 	if (oprofile_timer_init(ops))
 		return -ENODEV;
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4b89da248d17..bc439de48cd1 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,7 +24,6 @@ config SUPERH
 	select RTC_LIB
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
-	select ARCH_NO_SYSDEV_OPS
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index e71a531f1e31..77ec0e7b8ddf 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -48,7 +48,6 @@ CONFIG_PREEMPT=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_VERBOSE=y
 CONFIG_PM_RUNTIME=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index dc4a2eb6a616..c41650572d79 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -83,7 +83,6 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_VERBOSE=y
 CONFIG_PM_RUNTIME=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 14726eef1ce0..f0907995b4c9 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/io.h>
+#include <linux/prefetch.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <cpu/sq.h>
diff --git a/arch/sh/kernel/cpu/shmobile/pm_runtime.c b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
index 6dcb8166a64d..22db127afa7b 100644
--- a/arch/sh/kernel/cpu/shmobile/pm_runtime.c
+++ b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
@@ -139,7 +139,7 @@ void platform_pm_runtime_suspend_idle(void)
 	queue_work(pm_wq, &hwblk_work);
 }
 
-int platform_pm_runtime_suspend(struct device *dev)
+static int default_platform_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pdev_archdata *ad = &pdev->archdata;
@@ -147,7 +147,7 @@ int platform_pm_runtime_suspend(struct device *dev)
 	int hwblk = ad->hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_suspend() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -183,20 +183,20 @@ int platform_pm_runtime_suspend(struct device *dev)
 	mutex_unlock(&ad->mutex);
 
 out:
-	dev_dbg(dev, "platform_pm_runtime_suspend() [%d] returns %d\n",
-		hwblk, ret);
+	dev_dbg(dev, "%s() [%d] returns %d\n",
+		 __func__, hwblk, ret);
 
 	return ret;
 }
 
-int platform_pm_runtime_resume(struct device *dev)
+static int default_platform_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pdev_archdata *ad = &pdev->archdata;
 	int hwblk = ad->hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_resume() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -228,19 +228,19 @@ int platform_pm_runtime_resume(struct device *dev)
 	 */
 	mutex_unlock(&ad->mutex);
 out:
-	dev_dbg(dev, "platform_pm_runtime_resume() [%d] returns %d\n",
-		hwblk, ret);
+	dev_dbg(dev, "%s() [%d] returns %d\n",
+		__func__, hwblk, ret);
 
 	return ret;
 }
 
-int platform_pm_runtime_idle(struct device *dev)
+static int default_platform_runtime_idle(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	int hwblk = pdev->archdata.hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_idle() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -252,10 +252,19 @@ int platform_pm_runtime_idle(struct device *dev)
 	/* suspend synchronously to disable clocks immediately */
 	ret = pm_runtime_suspend(dev);
 out:
-	dev_dbg(dev, "platform_pm_runtime_idle() [%d] done!\n", hwblk);
+	dev_dbg(dev, "%s() [%d] done!\n", __func__, hwblk);
 	return ret;
 }
 
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = default_platform_runtime_suspend,
+		.runtime_resume = default_platform_runtime_resume,
+		.runtime_idle = default_platform_runtime_idle,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
+
 static int platform_bus_notify(struct notifier_block *nb,
 			       unsigned long action, void *data)
 {
@@ -276,6 +285,7 @@ static int platform_bus_notify(struct notifier_block *nb,
 		hwblk_disable(hwblk_info, hwblk);
 		/* make sure driver re-inits itself once */
 		__set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags);
+		dev->pwr_domain = &default_power_domain;
 		break;
 	/* TODO: add BUS_NOTIFY_BIND_DRIVER and increase idle count */
 	case BUS_NOTIFY_BOUND_DRIVER:
@@ -289,6 +299,7 @@ static int platform_bus_notify(struct notifier_block *nb,
 		__set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags);
 		break;
 	case BUS_NOTIFY_DEL_DEVICE:
+		dev->pwr_domain = NULL;
 		break;
 	}
 	return 0;
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 2130ca674e9b..3d7b209b2178 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -117,7 +117,11 @@ void user_enable_single_step(struct task_struct *child)
 
 	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 
+	if (ptrace_get_breakpoints(child) < 0)
+		return;
+
 	set_single_step(child, pc);
+	ptrace_put_breakpoints(child);
 }
 
 void user_disable_single_step(struct task_struct *child)
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 509b36b45115..6207561ea34a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case SMP_MSG_RESCHEDULE:
+		scheduler_ipi();
 		break;
 	case SMP_MSG_FUNCTION_SINGLE:
 		generic_smp_call_function_single_interrupt();
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 3484c2f65aba..b51a17104b5f 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -87,7 +87,6 @@ void die(const char * str, struct pt_regs * regs, long err)
 	bust_spinlocks(1);
 
 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
-	sysfs_printk_last_file();
 	print_modules();
 	show_regs(regs);
 
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 427d4684e0d2..fc73a82366f8 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,17 +7,20 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:\n\t"				\
-			 "nop\n\t"				\
-			 "nop\n\t"				\
-			 ".pushsection __jump_table,  \"a\"\n\t"\
-			 ".align 4\n\t"				\
-			 ".word 1b, %l[" #label "], %c0\n\t"	\
-			 ".popsection \n\t"			\
-			 : :  "i" (key) :  : label);\
-	} while (0)
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+		asm goto("1:\n\t"
+			 "nop\n\t"
+			 "nop\n\t"
+			 ".pushsection __jump_table,  \"aw\"\n\t"
+			 ".align 4\n\t"
+			 ".word 1b, %l[l_yes], %c0\n\t"
+			 ".popsection \n\t"
+			 : :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 1c79f32734a0..8b9c556d630b 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -65,6 +65,10 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 #define smt_capable()				(sparc64_multi_core)
 #endif /* CONFIG_SMP */
 
-#define cpu_coregroup_mask(cpu)			(&cpu_core_map[cpu])
+extern cpumask_t cpu_core_map[NR_CPUS];
+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+        return &cpu_core_map[cpu];
+}
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index f679c57644d5..1e34f29e58bb 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata apc_match[] = {
+static struct of_device_id apc_match[] = {
 	{
 		.name = APC_OBPNAME,
 	},
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 948068a083fc..d1840dbdaa2f 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
 	sabre_scan_bus(pbm, &op->dev);
 }
 
+static const struct of_device_id sabre_match[];
 static int __devinit sabre_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct linux_prom64_registers *pr_regs;
 	struct device_node *dp = op->dev.of_node;
 	struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
 	const u32 *vdma;
 	u64 clear_irq;
 
-	hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL);
+	match = of_match_device(sabre_match, &op->dev);
+	hummingbird_p = match && (match->data != NULL);
 	if (!hummingbird_p) {
 		struct device_node *cpu_dp;
 
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index fecfcb2063c8..283fbc329a43 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1458,11 +1458,15 @@ out_err:
 	return err;
 }
 
+static const struct of_device_id schizo_match[];
 static int __devinit schizo_probe(struct platform_device *op)
 {
-	if (!op->dev.of_match)
+	const struct of_device_id *match;
+
+	match = of_match_device(schizo_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	return __schizo_init(op, (unsigned long) op->dev.of_match->data);
+	return __schizo_init(op, (unsigned long)match->data);
 }
 
 /* The ordering of this table is very important.  Some Tomatillo
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index 93d7b4465f8d..6a585d393580 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata pmc_match[] = {
+static struct of_device_id pmc_match[] = {
 	{
 		.name = PMC_OBPNAME,
 	},
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb70858..442286d83435 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 void __cpuinit smp_store_cpu_info(int id)
 {
 	int cpu_node;
+	int mid;
 
 	cpu_data(id).udelay_val = loops_per_jiffy;
 
@@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id)
 	cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
 						     "clock-frequency", 0);
 	cpu_data(id).prom_node = cpu_node;
-	cpu_data(id).mid = cpu_get_hwmid(cpu_node);
+	mid = cpu_get_hwmid(cpu_node);
 
-	if (cpu_data(id).mid < 0)
-		panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
+	if (mid < 0) {
+		printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node);
+		mid = 0;
+	}
+	cpu_data(id).mid = mid;
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -125,7 +129,9 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 
 void smp_send_reschedule(int cpu)
 {
-	/* See sparc64 */
+	/*
+	 * XXX missing reschedule IPI, see scheduler_ipi()
+	 */
 }
 
 void smp_send_stop(void)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 3e94a8c23238..9478da7fdb3e 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1368,6 +1368,7 @@ void smp_send_reschedule(int cpu)
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
+	scheduler_ipi();
 }
 
 /* This is a nop because we capture all other cpus
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 4e236391b635..96046a4024c2 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata clock_match[] = {
+static struct of_device_id clock_match[] = {
 	{
 		.name = "eeprom",
 	},
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 3632cb34e914..0084c3361e15 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -289,10 +289,16 @@ cc_end_cruft:
 
 	/* Also, handle the alignment code out of band. */
 cc_dword_align:
-	cmp	%g1, 6
-	bl,a	ccte
+	cmp	%g1, 16
+	bge	1f
+	 srl	%g1, 1, %o3
+2:	cmp	%o3, 0
+	be,a	ccte
 	 andcc	%g1, 0xf, %o3
-	andcc	%o0, 0x1, %g0
+	andcc	%o3, %o0, %g0	! Check %o0 only (%o1 has the same last 2 bits)
+	be,a	2b
+	 srl	%o3, 1, %o3
+1:	andcc	%o0, 0x1, %g0
 	bne	ccslow
 	 andcc	%o0, 0x2, %g0
 	be	1f
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index a4293102ef81..c52224d5ed45 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end)
 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
 {
-	/*
-	 * Nothing to do here; when we return from interrupt, the
-	 * rescheduling will occur there. But do bump the interrupt
-	 * profiler count in the meantime.
-	 */
 	__get_cpu_var(irq_stat).irq_resched_count++;
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
index 90a438acbfaf..b5e675e370c6 100644
--- a/arch/um/Kconfig.um
+++ b/arch/um/Kconfig.um
@@ -47,7 +47,7 @@ config HOSTFS
 
 config HPPFS
 	tristate "HoneyPot ProcFS (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on EXPERIMENTAL && PROC_FS
 	help
 	  hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
 	  entries to be overridden, removed, or fabricated from the host.
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86
index 02fb017fed47..a9da516a5274 100644
--- a/arch/um/Kconfig.x86
+++ b/arch/um/Kconfig.x86
@@ -4,6 +4,10 @@ menu "UML-specific options"
 
 menu "Host processor type and features"
 
+config CMPXCHG_LOCAL
+	bool
+	default n
+
 source "arch/x86/Kconfig.cpu"
 
 endmenu
diff --git a/arch/um/include/asm/bug.h b/arch/um/include/asm/bug.h
new file mode 100644
index 000000000000..9e33b864c359
--- /dev/null
+++ b/arch/um/include/asm/bug.h
@@ -0,0 +1,6 @@
+#ifndef __UM_BUG_H
+#define __UM_BUG_H
+
+#include <asm-generic/bug.h>
+
+#endif
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index e2cf786bda0a..5bd1bad33fab 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -49,7 +49,10 @@ static inline struct thread_info *current_thread_info(void)
 {
 	struct thread_info *ti;
 	unsigned long mask = THREAD_SIZE - 1;
-	ti = (struct thread_info *) (((unsigned long) &ti) & ~mask);
+	void *p;
+
+	asm volatile ("" : "=r" (p) : "0" (&ti));
+	ti = (struct thread_info *) (((unsigned long)p) & ~mask);
 	return ti;
 }
 
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 106bf27e2a9a..eefb107d2d73 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -173,7 +173,7 @@ void IPI_handler(int cpu)
 			break;
 
 		case 'R':
-			set_tsk_need_resched(current);
+			scheduler_ipi();
 			break;
 
 		case 'S':
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 6ea77979531c..42827cafa6af 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -5,6 +5,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
 #include <string.h>
@@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len)
 		 host.release, host.version, host.machine);
 }
 
+/*
+ * We cannot use glibc's abort(). It makes use of tgkill() which
+ * has no effect within UML's kernel threads.
+ * After that glibc would execute an invalid instruction to kill
+ * the calling process and UML crashes with SIGSEGV.
+ */
+static inline void __attribute__ ((noreturn)) uml_abort(void)
+{
+	sigset_t sig;
+
+	fflush(NULL);
+
+	if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT))
+		sigprocmask(SIG_UNBLOCK, &sig, 0);
+
+	for (;;)
+		if (kill(getpid(), SIGABRT) < 0)
+			exit(127);
+}
+
 void os_dump_core(void)
 {
 	int pid;
@@ -116,5 +137,5 @@ void os_dump_core(void)
 	while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0)
 		os_kill_ptraced_process(pid, 0);
 
-	abort();
+	uml_abort();
 }
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 804b28dd0328..b1da91c1b200 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -4,7 +4,7 @@
 
 obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
-	sys_call_table.o tls.o
+	sys_call_table.o tls.o atomic64_cx8_32.o
 
 obj-$(CONFIG_BINFMT_ELF) += elfcore.o
 
diff --git a/arch/um/sys-i386/atomic64_cx8_32.S b/arch/um/sys-i386/atomic64_cx8_32.S
new file mode 100644
index 000000000000..1e901d3d4a95
--- /dev/null
+++ b/arch/um/sys-i386/atomic64_cx8_32.S
@@ -0,0 +1,225 @@
+/*
+ * atomic64_t for 586+
+ *
+ * Copied from arch/x86/lib/atomic64_cx8_32.S
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/dwarf2.h>
+
+.macro SAVE reg
+	pushl_cfi %\reg
+	CFI_REL_OFFSET \reg, 0
+.endm
+
+.macro RESTORE reg
+	popl_cfi %\reg
+	CFI_RESTORE \reg
+.endm
+
+.macro read64 reg
+	movl %ebx, %eax
+	movl %ecx, %edx
+/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
+	LOCK_PREFIX
+	cmpxchg8b (\reg)
+.endm
+
+ENTRY(atomic64_read_cx8)
+	CFI_STARTPROC
+
+	read64 %ecx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_read_cx8)
+
+ENTRY(atomic64_set_cx8)
+	CFI_STARTPROC
+
+1:
+/* we don't need LOCK_PREFIX since aligned 64-bit writes
+ * are atomic on 586 and newer */
+	cmpxchg8b (%esi)
+	jne 1b
+
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_set_cx8)
+
+ENTRY(atomic64_xchg_cx8)
+	CFI_STARTPROC
+
+	movl %ebx, %eax
+	movl %ecx, %edx
+1:
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_xchg_cx8)
+
+.macro addsub_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+	CFI_STARTPROC
+	SAVE ebp
+	SAVE ebx
+	SAVE esi
+	SAVE edi
+
+	movl %eax, %esi
+	movl %edx, %edi
+	movl %ecx, %ebp
+
+	read64 %ebp
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	\ins\()l %esi, %ebx
+	\insc\()l %edi, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%ebp)
+	jne 1b
+
+10:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE edi
+	RESTORE esi
+	RESTORE ebx
+	RESTORE ebp
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+
+addsub_return add add adc
+addsub_return sub sub sbb
+
+.macro incdec_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	\ins\()l $1, %ebx
+	\insc\()l $0, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+10:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE ebx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+
+incdec_return inc add adc
+incdec_return dec sub sbb
+
+ENTRY(atomic64_dec_if_positive_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	subl $1, %ebx
+	sbb $0, %ecx
+	js 2f
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+2:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE ebx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_dec_if_positive_cx8)
+
+ENTRY(atomic64_add_unless_cx8)
+	CFI_STARTPROC
+	SAVE ebp
+	SAVE ebx
+/* these just push these two parameters on the stack */
+	SAVE edi
+	SAVE esi
+
+	movl %ecx, %ebp
+	movl %eax, %esi
+	movl %edx, %edi
+
+	read64 %ebp
+1:
+	cmpl %eax, 0(%esp)
+	je 4f
+2:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	addl %esi, %ebx
+	adcl %edi, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%ebp)
+	jne 1b
+
+	movl $1, %eax
+3:
+	addl $8, %esp
+	CFI_ADJUST_CFA_OFFSET -8
+	RESTORE ebx
+	RESTORE ebp
+	ret
+4:
+	cmpl %edx, 4(%esp)
+	jne 2b
+	xorl %eax, %eax
+	jmp 3b
+	CFI_ENDPROC
+ENDPROC(atomic64_add_unless_cx8)
+
+ENTRY(atomic64_inc_not_zero_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	testl %eax, %eax
+	je 4f
+2:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	addl $1, %ebx
+	adcl $0, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+	movl $1, %eax
+3:
+	RESTORE ebx
+	ret
+4:
+	testl %edx, %edx
+	jne 2b
+	jmp 3b
+	CFI_ENDPROC
+ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c
index 2aa30a364bbe..d4efa7d679ff 100644
--- a/arch/unicore32/kernel/irq.c
+++ b/arch/unicore32/kernel/irq.c
@@ -23,7 +23,7 @@
 #include <linux/list.h>
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 
 #include <asm/system.h>
@@ -237,7 +237,7 @@ static struct puv3_irq_state {
 	unsigned int	iccr;
 } puv3_irq_state;
 
-static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state)
+static int puv3_irq_suspend(void)
 {
 	struct puv3_irq_state *st = &puv3_irq_state;
 
@@ -265,7 +265,7 @@ static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int puv3_irq_resume(struct sys_device *dev)
+static void puv3_irq_resume(void)
 {
 	struct puv3_irq_state *st = &puv3_irq_state;
 
@@ -278,27 +278,20 @@ static int puv3_irq_resume(struct sys_device *dev)
 
 		writel(st->icmr, INTC_ICMR);
 	}
-	return 0;
 }
 
-static struct sysdev_class puv3_irq_sysclass = {
-	.name		= "pkunity-irq",
+static struct syscore_ops puv3_irq_syscore_ops = {
 	.suspend	= puv3_irq_suspend,
 	.resume		= puv3_irq_resume,
 };
 
-static struct sys_device puv3_irq_device = {
-	.id		= 0,
-	.cls		= &puv3_irq_sysclass,
-};
-
-static int __init puv3_irq_init_devicefs(void)
+static int __init puv3_irq_init_syscore(void)
 {
-	sysdev_class_register(&puv3_irq_sysclass);
-	return sysdev_register(&puv3_irq_device);
+	register_syscore_ops(&puv3_irq_syscore_ops);
+	return 0;
 }
 
-device_initcall(puv3_irq_init_devicefs);
+device_initcall(puv3_irq_init_syscore);
 
 void __init init_IRQ(void)
 {
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 254e36fa9513..b9a26465e728 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -192,7 +192,6 @@ static int __die(const char *str, int err, struct thread_info *thread,
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on UniCore */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2096cf180648..880fcb6c86f4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -8,6 +8,7 @@ config 64BIT
 
 config X86_32
 	def_bool !64BIT
+	select CLKSRC_I8253
 
 config X86_64
 	def_bool 64BIT
@@ -71,7 +72,6 @@ config X86
 	select GENERIC_IRQ_SHOW
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
-	select ARCH_NO_SYSDEV_OPS
 	select HAVE_BPF_JIT if (X86_64 && NET)
 
 config INSTRUCTION_DECODER
@@ -113,7 +113,14 @@ config MMU
 	def_bool y
 
 config ZONE_DMA
-	def_bool y
+	bool "DMA memory allocation support" if EXPERT
+	default y
+	help
+	  DMA memory allocation support allows devices with less than 32-bit
+	  addressing to allocate within the first 16MB of address space.
+	  Disable if no such devices will be used.
+
+	  If unsure, say Y.
 
 config SBUS
 	bool
@@ -366,17 +373,6 @@ config X86_UV
 # Following is an alphabetically sorted list of 32 bit extended platforms
 # Please maintain the alphabetic order if and when there are additions
 
-config X86_ELAN
-	bool "AMD Elan"
-	depends on X86_32
-	depends on X86_EXTENDED_PLATFORM
-	---help---
-	  Select this for an AMD Elan processor.
-
-	  Do not use this option for K6/Athlon/Opteron processors!
-
-	  If unsure, choose "PC-compatible" instead.
-
 config X86_INTEL_CE
 	bool "CE4100 TV platform"
 	depends on PCI
@@ -691,6 +687,7 @@ config AMD_IOMMU
 	bool "AMD IOMMU support"
 	select SWIOTLB
 	select PCI_MSI
+	select PCI_IOV
 	depends on X86_64 && PCI && ACPI
 	---help---
 	  With this option you can enable support for AMD IOMMU hardware in
@@ -1175,7 +1172,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 config AMD_NUMA
 	def_bool y
 	prompt "Old style AMD Opteron NUMA detection"
-	depends on X86_64 && NUMA && PCI
+	depends on NUMA && PCI
 	---help---
 	  Enable AMD NUMA node topology detection.  You should say Y here if
 	  you have a multi processor AMD system. This uses an old method to
@@ -1202,7 +1199,7 @@ config NODES_SPAN_OTHER_NODES
 
 config NUMA_EMU
 	bool "NUMA emulation"
-	depends on X86_64 && NUMA
+	depends on NUMA
 	---help---
 	  Enable NUMA emulation. A flat machine will be split
 	  into virtual nodes when booted with "numa=fake=N", where N is the
@@ -1224,6 +1221,10 @@ config HAVE_ARCH_BOOTMEM
 	def_bool y
 	depends on X86_32 && NUMA
 
+config HAVE_ARCH_ALLOC_REMAP
+	def_bool y
+	depends on X86_32 && NUMA
+
 config ARCH_HAVE_MEMORY_PRESENT
 	def_bool y
 	depends on X86_32 && DISCONTIGMEM
@@ -1232,13 +1233,9 @@ config NEED_NODE_MEMMAP_SIZE
 	def_bool y
 	depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
 
-config HAVE_ARCH_ALLOC_REMAP
-	def_bool y
-	depends on X86_32 && NUMA
-
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
+	depends on X86_32 && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
@@ -1248,20 +1245,16 @@ config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
 	depends on NUMA && X86_32
 
-config ARCH_PROC_KCORE_TEXT
-	def_bool y
-	depends on X86_64 && PROC_KCORE
-
-config ARCH_SPARSEMEM_DEFAULT
-	def_bool y
-	depends on X86_64
-
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+	depends on X86_64
+
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on ARCH_SPARSEMEM_ENABLE
@@ -1270,6 +1263,10 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+	depends on X86_64 && PROC_KCORE
+
 config ILLEGAL_POINTER_VALUE
        hex
        default 0 if X86_32
@@ -1704,10 +1701,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
 	def_bool y
 	depends on MEMORY_HOTPLUG
 
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	def_bool X86_64
-	depends on NUMA
-
 config USE_PERCPU_NUMA_NODE_ID
 	def_bool y
 	depends on NUMA
@@ -1849,7 +1842,7 @@ config APM_ALLOW_INTS
 
 endif # APM
 
-source "arch/x86/kernel/cpu/cpufreq/Kconfig"
+source "drivers/cpufreq/Kconfig"
 
 source "drivers/cpuidle/Kconfig"
 
@@ -2077,7 +2070,7 @@ config OLPC
 	depends on !X86_PAE
 	select GPIOLIB
 	select OF
-	select OF_PROMTREE if PROC_DEVICETREE
+	select OF_PROMTREE
 	---help---
 	  Add support for detecting the unique features of the OLPC
 	  XO hardware.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index d161e939df62..6a7cfdf8ff69 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -1,6 +1,4 @@
 # Put here option for CPU selection and depending optimization
-if !X86_ELAN
-
 choice
 	prompt "Processor family"
 	default M686 if X86_32
@@ -203,6 +201,14 @@ config MWINCHIP3D
 	  stores for this CPU, which can increase performance of some
 	  operations.
 
+config MELAN
+	bool "AMD Elan"
+	depends on X86_32
+	---help---
+	  Select this for an AMD Elan processor.
+
+	  Do not use this option for K6/Athlon/Opteron processors!
+
 config MGEODEGX1
 	bool "GeodeGX1"
 	depends on X86_32
@@ -292,8 +298,6 @@ config X86_GENERIC
 	  This is really intended for distributors who need more
 	  generic optimizations.
 
-endif
-
 #
 # Define implied options from the CPU selection here
 config X86_INTERNODE_CACHE_SHIFT
@@ -312,7 +316,7 @@ config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
 	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
+	default "4" if MELAN || M486 || M386 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
 config X86_XADD
@@ -358,7 +362,7 @@ config X86_POPAD_OK
 
 config X86_ALIGNMENT_16
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
 
 config X86_INTEL_USERCOPY
 	def_bool y
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index f2ee1abb1df9..86cee7b749e1 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -37,7 +37,7 @@ cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=
 	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
 
 # AMD Elan support
-cflags-$(CONFIG_X86_ELAN)	+= -march=i486
+cflags-$(CONFIG_MELAN)		+= -march=i486
 
 # Geode GX1 support
 cflags-$(CONFIG_MGEODEGX1)	+= -march=pentium-mmx
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index cae3feb1035e..db75d07c3645 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -91,7 +91,7 @@ static int detect_memory_e801(void)
 	if (oreg.ax > 15*1024) {
 		return -1;	/* Bogus! */
 	} else if (oreg.ax == 15*1024) {
-		boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
+		boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
 	} else {
 		/*
 		 * This ignores memory above 16MB if we have a memory
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 12e0e7dd869c..416d865eae39 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { }
 
 #define ARCH_HAS_POWER_INIT	1
 
-struct bootnode;
-
 #ifdef CONFIG_ACPI_NUMA
 extern int acpi_numa;
 extern int x86_acpi_numa_init(void);
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index a63a68be1cce..94d420b360d1 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -15,4 +15,13 @@
 	.endm
 #endif
 
+.macro altinstruction_entry orig alt feature orig_len alt_len
+	.align 8
+	.quad \orig
+	.quad \alt
+	.word \feature
+	.byte \orig_len
+	.byte \alt_len
+.endm
+
 #endif  /*  __ASSEMBLY__  */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99a..bf535f947e8c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,7 +4,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
-#include <linux/jump_label.h>
 #include <asm/asm.h>
 
 /*
@@ -191,12 +190,4 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-#define IDEAL_NOP_SIZE_5 5
-extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
-extern void arch_init_ideal_nop5(void);
-#else
-static inline void arch_init_ideal_nop5(void) {}
-#endif
-
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 916bc8111a01..55d95eb789b3 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -19,13 +19,12 @@
 #ifndef _ASM_X86_AMD_IOMMU_PROTO_H
 #define _ASM_X86_AMD_IOMMU_PROTO_H
 
-struct amd_iommu;
+#include <asm/amd_iommu_types.h>
 
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_flush_all_domains(void);
-extern void amd_iommu_flush_all_devices(void);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
 extern int amd_iommu_init_devices(void);
@@ -44,4 +43,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
 	       (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
 }
 
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+{
+	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+		return false;
+
+	return !!(iommu->features & f);
+}
+
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index e3509fc303bf..4c9982995414 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -68,12 +68,25 @@
 #define MMIO_CONTROL_OFFSET     0x0018
 #define MMIO_EXCL_BASE_OFFSET   0x0020
 #define MMIO_EXCL_LIMIT_OFFSET  0x0028
+#define MMIO_EXT_FEATURES	0x0030
 #define MMIO_CMD_HEAD_OFFSET	0x2000
 #define MMIO_CMD_TAIL_OFFSET	0x2008
 #define MMIO_EVT_HEAD_OFFSET	0x2010
 #define MMIO_EVT_TAIL_OFFSET	0x2018
 #define MMIO_STATUS_OFFSET	0x2020
 
+
+/* Extended Feature Bits */
+#define FEATURE_PREFETCH	(1ULL<<0)
+#define FEATURE_PPR		(1ULL<<1)
+#define FEATURE_X2APIC		(1ULL<<2)
+#define FEATURE_NX		(1ULL<<3)
+#define FEATURE_GT		(1ULL<<4)
+#define FEATURE_IA		(1ULL<<6)
+#define FEATURE_GA		(1ULL<<7)
+#define FEATURE_HE		(1ULL<<8)
+#define FEATURE_PC		(1ULL<<9)
+
 /* MMIO status bits */
 #define MMIO_STATUS_COM_WAIT_INT_MASK	0x04
 
@@ -113,7 +126,9 @@
 /* command specific defines */
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
-#define CMD_INV_IOMMU_PAGES     0x03
+#define CMD_INV_IOMMU_PAGES	0x03
+#define CMD_INV_IOTLB_PAGES	0x04
+#define CMD_INV_ALL		0x08
 
 #define CMD_COMPL_WAIT_STORE_MASK	0x01
 #define CMD_COMPL_WAIT_INT_MASK		0x02
@@ -215,6 +230,8 @@
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+#define DTE_FLAG_IOTLB	0x01
+
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
@@ -227,6 +244,7 @@
 /* IOMMU capabilities */
 #define IOMMU_CAP_IOTLB   24
 #define IOMMU_CAP_NPCACHE 26
+#define IOMMU_CAP_EFR     27
 
 #define MAX_DOMAIN_ID 65536
 
@@ -249,6 +267,8 @@ extern bool amd_iommu_dump;
 
 /* global flag if IOMMUs cache non-present entries */
 extern bool amd_iommu_np_cache;
+/* Only true if all IOMMUs support device IOTLBs */
+extern bool amd_iommu_iotlb_sup;
 
 /*
  * Make iterating over all IOMMUs easier
@@ -371,6 +391,9 @@ struct amd_iommu {
 	/* flags read from acpi table */
 	u8 acpi_flags;
 
+	/* Extended features */
+	u64 features;
+
 	/*
 	 * Capability pointer. There could be more than one IOMMU per PCI
 	 * device function if there are more than one AMD IOMMU capability
@@ -409,9 +432,6 @@ struct amd_iommu {
 	/* if one, we need to send a completion wait command */
 	bool need_sync;
 
-	/* becomes true if a command buffer reset is running */
-	bool reset_in_progress;
-
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
 
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 331682231bb4..67f87f257611 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range {
 
 extern const struct pci_device_id amd_nb_misc_ids[];
 extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
-struct bootnode;
 
 extern bool early_is_amd_nb(u32 value);
 extern int amd_cache_northbridges(void);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 2b7d573be549..a0c46f061210 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -363,7 +363,12 @@ struct apic {
 	 */
 	int (*x86_32_early_logical_apicid)(int cpu);
 
-	/* determine CPU -> NUMA node mapping */
+	/*
+	 * Optional method called from setup_local_APIC() after logical
+	 * apicid is guaranteed to be known to initialize apicid -> node
+	 * mapping if NUMA initialization hasn't done so already.  Don't
+	 * add new users.
+	 */
 	int (*x86_32_numa_cpu_node)(int cpu);
 #endif
 };
@@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 	return cpuid_apic >> index_msb;
 }
 
-extern int default_x86_32_numa_cpu_node(int cpu);
-
 #endif
 
 static inline unsigned int
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3e..34595d5e1038 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
 #define		APIC_DEST_LOGICAL	0x00800
 #define		APIC_DEST_PHYSICAL	0x00000
 #define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_FIXED_MASK	0x00700
 #define		APIC_DM_LOWEST		0x00100
 #define		APIC_DM_SMI		0x00200
 #define		APIC_DM_REMRD		0x00300
diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index 3c7521063d3f..aa6a3170ab5a 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -4,16 +4,40 @@
 #include <asm/io.h>
 
 /*
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E.
+ * Returns physical address of EBDA.  Returns 0 if there is no EBDA.
  */
 static inline unsigned int get_bios_ebda(void)
 {
+	/*
+	 * There is a real-mode segmented pointer pointing to the
+	 * 4K EBDA area at 0x40E.
+	 */
 	unsigned int address = *(unsigned short *)phys_to_virt(0x40E);
 	address <<= 4;
 	return address;	/* 0 means none */
 }
 
+/*
+ * Return the sanitized length of the EBDA in bytes, if it exists.
+ */
+static inline unsigned int get_bios_ebda_length(void)
+{
+	unsigned int address;
+	unsigned int length;
+
+	address = get_bios_ebda();
+	if (!address)
+		return 0;
+
+	/* EBDA length is byte 0 of the EBDA (stored in KiB) */
+	length = *(unsigned char *)phys_to_virt(address);
+	length <<= 10;
+
+	/* Trim the length if it extends beyond 640KiB */
+	length = min_t(unsigned int, (640 * 1024) - address, length);
+	return length;
+}
+
 void reserve_ebda_region(void);
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 91f3e087cf21..5dc6acc98dbd 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -195,6 +195,8 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
 #define X86_FEATURE_FSGSBASE	(9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_SMEP	(9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
@@ -207,8 +209,7 @@ extern const char * const x86_power_flags[32];
 #define test_cpu_cap(c, bit)						\
 	 test_bit(bit, (unsigned long *)((c)->x86_capability))
 
-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) &&					\
+#define REQUIRED_MASK_BIT_SET(bit)					\
 	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
 	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
 	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
@@ -218,10 +219,16 @@ extern const char * const x86_power_flags[32];
 	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
 	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ||	\
 	   (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) ||	\
-	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )	\
-	  ? 1 :								\
+	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
+
+#define cpu_has(c, bit)							\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
 	 test_cpu_cap(c, bit))
 
+#define this_cpu_has(bit)						\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
+	 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
 #define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 057099e5faba..0bdb0c54d9a1 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -69,22 +69,18 @@
 
 #define MAX_DMA_CHANNELS	8
 
-#ifdef CONFIG_X86_32
-
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS      (PAGE_OFFSET + 0x1000000)
-
-#else
-
 /* 16MB ISA DMA zone */
 #define MAX_DMA_PFN   ((16 * 1024 * 1024) >> PAGE_SHIFT)
 
 /* 4GB broken PCI/AGP hardware bus master zone */
 #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
 
+#ifdef CONFIG_X86_32
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS      (PAGE_OFFSET + 0x1000000)
+#else
 /* Compat define for old dma zone */
 #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
-
 #endif
 
 /* 8237 DMA controllers */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8e4a16508d4e..7093e4a6a0bc 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
+extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern void efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index db24c2278be0..268c783ab1c0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -38,11 +38,10 @@ extern void mcount(void);
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
-	 * call mcount is "e8 <4 byte offset>"
-	 * The addr points to the 4 byte offset and the caller of this
-	 * function wants the pointer to e8. Simply subtract one.
+	 * addr is the address of the mcount call instruction.
+	 * recordmcount does the necessary offset calculation.
 	 */
-	return addr - 1;
+	return addr;
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 43085bfc99c3..156cd5d18d2a 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -66,7 +66,7 @@ static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order)
 	 * Don't enable translation but enable GART IO and CPU accesses.
 	 * Also, set DISTLBWALKPRB since GART tables memory is UC.
 	 */
-	ctl = DISTLBWALKPRB | order << 1;
+	ctl = order << 1;
 
 	pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 }
@@ -75,17 +75,17 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
 {
 	u32 tmp, ctl;
 
-        /* address of the mappings table */
-        addr >>= 12;
-        tmp = (u32) addr<<4;
-        tmp &= ~0xf;
-        pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp);
-
-        /* Enable GART translation for this hammer. */
-        pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
-        ctl |= GARTEN;
-        ctl &= ~(DISGARTCPU | DISGARTIO);
-        pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
+	/* address of the mappings table */
+	addr >>= 12;
+	tmp = (u32) addr<<4;
+	tmp &= ~0xf;
+	pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp);
+
+	/* Enable GART translation for this hammer. */
+	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
+	ctl |= GARTEN | DISTLBWALKPRB;
+	ctl &= ~(DISGARTCPU | DISGARTIO);
+	pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 }
 
 static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size)
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h
index fc1f579fb965..65aaa91d5850 100644
--- a/arch/x86/include/asm/i8253.h
+++ b/arch/x86/include/asm/i8253.h
@@ -6,6 +6,8 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
+#define PIT_LATCH	LATCH
+
 extern raw_spinlock_t i8253_lock;
 
 extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index c4bd267dfc50..a97a240f67f3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,7 +150,7 @@ void setup_IO_APIC_irq_extra(u32 gsi);
 extern void ioapic_and_gsi_init(void);
 extern void ioapic_insert_resources(void);
 
-int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr);
+int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
 
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 574dbc22893a..a32b18ce6ead 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -5,20 +5,25 @@
 
 #include <linux/types.h>
 #include <asm/nops.h>
+#include <asm/asm.h>
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-
-# define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:"					\
-			JUMP_LABEL_INITIAL_NOP			\
-			".pushsection __jump_table,  \"aw\" \n\t"\
-			_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
-			".popsection \n\t"			\
-			: :  "i" (key) :  : label);		\
-	} while (0)
+#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:"
+		JUMP_LABEL_INITIAL_NOP
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		".popsection \n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index eb16e94ae04f..021979a6e23f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -142,8 +142,6 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
 static inline void enable_p5_mce(void) {}
 #endif
 
-extern void (*x86_mce_decode_callback)(struct mce *m);
-
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
 DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 91df7c51806c..5e83a416eca8 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -13,31 +13,11 @@ extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)	(node_data[nid])
 
 #include <asm/numaq.h>
-/* summit or generic arch */
-#include <asm/srat.h>
-
-extern int get_memcfg_numa_flat(void);
-/*
- * This allows any one NUMA architecture to be compiled
- * for, and still fall back to the flat function if it
- * fails.
- */
-static inline void get_memcfg_numa(void)
-{
-
-	if (get_memcfg_numaq())
-		return;
-	if (get_memcfg_from_srat())
-		return;
-	get_memcfg_numa_flat();
-}
 
 extern void resume_map_numa_kva(pgd_t *pgd);
 
 #else /* !CONFIG_NUMA */
 
-#define get_memcfg_numa get_memcfg_numa_flat
-
 static inline void resume_map_numa_kva(pgd_t *pgd) {}
 
 #endif /* CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index 288b96f815a6..b3f88d7867c7 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -4,36 +4,13 @@
 #ifndef _ASM_X86_MMZONE_64_H
 #define _ASM_X86_MMZONE_64_H
 
-
 #ifdef CONFIG_NUMA
 
 #include <linux/mmdebug.h>
-
 #include <asm/smp.h>
 
-/* Simple perfect hash to map physical addresses to node numbers */
-struct memnode {
-	int shift;
-	unsigned int mapsize;
-	s16 *map;
-	s16 embedded_map[64 - 8];
-} ____cacheline_aligned; /* total size = 128 bytes */
-extern struct memnode memnode;
-#define memnode_shift memnode.shift
-#define memnodemap memnode.map
-#define memnodemapsize memnode.mapsize
-
 extern struct pglist_data *node_data[];
 
-static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
-{
-	unsigned nid;
-	VIRTUAL_BUG_ON(!memnodemap);
-	nid = memnodemap[addr >> memnode_shift];
-	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
-	return nid;
-}
-
 #define NODE_DATA(nid)		(node_data[nid])
 
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 67763c5d8b4e..9eae7752ae9b 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -35,7 +35,7 @@
 #define MODULE_PROC_FAMILY "K7 "
 #elif defined CONFIG_MK8
 #define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_X86_ELAN
+#elif defined CONFIG_MELAN
 #define MODULE_PROC_FAMILY "ELAN "
 #elif defined CONFIG_MCRUSOE
 #define MODULE_PROC_FAMILY "CRUSOE "
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index fd5a1f365c95..3cce71413d0b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -96,11 +96,15 @@
 #define MSR_IA32_MC0_ADDR		0x00000402
 #define MSR_IA32_MC0_MISC		0x00000403
 
+#define MSR_AMD64_MC0_MASK		0xc0010044
+
 #define MSR_IA32_MCx_CTL(x)		(MSR_IA32_MC0_CTL + 4*(x))
 #define MSR_IA32_MCx_STATUS(x)		(MSR_IA32_MC0_STATUS + 4*(x))
 #define MSR_IA32_MCx_ADDR(x)		(MSR_IA32_MC0_ADDR + 4*(x))
 #define MSR_IA32_MCx_MISC(x)		(MSR_IA32_MC0_MISC + 4*(x))
 
+#define MSR_AMD64_MCx_MASK(x)		(MSR_AMD64_MC0_MASK + (x))
+
 /* These are consecutive and not in the normal 4er MCE bank block */
 #define MSR_IA32_MC0_CTL2		0x00000280
 #define MSR_IA32_MCx_CTL2(x)		(MSR_IA32_MC0_CTL2 + (x))
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index af788496020b..405b4032a60b 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -1,7 +1,13 @@
 #ifndef _ASM_X86_NOPS_H
 #define _ASM_X86_NOPS_H
 
-/* Define nops for use with alternative() */
+/*
+ * Define nops for use with alternative() and for tracing.
+ *
+ * *_NOP5_ATOMIC must be a single instruction.
+ */
+
+#define NOP_DS_PREFIX 0x3e
 
 /* generic versions from gas
    1: nop
@@ -13,14 +19,15 @@
    6: leal 0x00000000(%esi),%esi
    7: leal 0x00000000(,%esi,1),%esi
 */
-#define GENERIC_NOP1 ".byte 0x90\n"
-#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
-#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
+#define GENERIC_NOP1 0x90
+#define GENERIC_NOP2 0x89,0xf6
+#define GENERIC_NOP3 0x8d,0x76,0x00
+#define GENERIC_NOP4 0x8d,0x74,0x26,0x00
+#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4
+#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
+#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7
+#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4
 
 /* Opteron 64bit nops
    1: nop
@@ -29,13 +36,14 @@
    4: osp osp osp nop
 */
 #define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2	".byte 0x66,0x90\n"
-#define K8_NOP3	".byte 0x66,0x66,0x90\n"
-#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n"
-#define K8_NOP5	K8_NOP3 K8_NOP2
-#define K8_NOP6	K8_NOP3 K8_NOP3
-#define K8_NOP7	K8_NOP4 K8_NOP3
-#define K8_NOP8	K8_NOP4 K8_NOP4
+#define K8_NOP2	0x66,K8_NOP1
+#define K8_NOP3	0x66,K8_NOP2
+#define K8_NOP4	0x66,K8_NOP3
+#define K8_NOP5	K8_NOP3,K8_NOP2
+#define K8_NOP6	K8_NOP3,K8_NOP3
+#define K8_NOP7	K8_NOP4,K8_NOP3
+#define K8_NOP8	K8_NOP4,K8_NOP4
+#define K8_NOP5_ATOMIC 0x66,K8_NOP4
 
 /* K7 nops
    uses eax dependencies (arbitrary choice)
@@ -47,13 +55,14 @@
    7: leal 0x00000000(,%eax,1),%eax
 */
 #define K7_NOP1	GENERIC_NOP1
-#define K7_NOP2	".byte 0x8b,0xc0\n"
-#define K7_NOP3	".byte 0x8d,0x04,0x20\n"
-#define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
-#define K7_NOP5	K7_NOP4 ASM_NOP1
-#define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7	".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8	K7_NOP7 ASM_NOP1
+#define K7_NOP2	0x8b,0xc0
+#define K7_NOP3	0x8d,0x04,0x20
+#define K7_NOP4	0x8d,0x44,0x20,0x00
+#define K7_NOP5	K7_NOP4,K7_NOP1
+#define K7_NOP6	0x8d,0x80,0,0,0,0
+#define K7_NOP7	0x8D,0x04,0x05,0,0,0,0
+#define K7_NOP8	K7_NOP7,K7_NOP1
+#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4
 
 /* P6 nops
    uses eax dependencies (Intel-recommended choice)
@@ -69,52 +78,65 @@
 	There is kernel code that depends on this.
 */
 #define P6_NOP1	GENERIC_NOP1
-#define P6_NOP2	".byte 0x66,0x90\n"
-#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
+#define P6_NOP2	0x66,0x90
+#define P6_NOP3	0x0f,0x1f,0x00
+#define P6_NOP4	0x0f,0x1f,0x40,0
+#define P6_NOP5	0x0f,0x1f,0x44,0x00,0
+#define P6_NOP6	0x66,0x0f,0x1f,0x44,0x00,0
+#define P6_NOP7	0x0f,0x1f,0x80,0,0,0,0
+#define P6_NOP8	0x0f,0x1f,0x84,0x00,0,0,0,0
+#define P6_NOP5_ATOMIC P6_NOP5
+
+#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
 
 #if defined(CONFIG_MK7)
-#define ASM_NOP1 K7_NOP1
-#define ASM_NOP2 K7_NOP2
-#define ASM_NOP3 K7_NOP3
-#define ASM_NOP4 K7_NOP4
-#define ASM_NOP5 K7_NOP5
-#define ASM_NOP6 K7_NOP6
-#define ASM_NOP7 K7_NOP7
-#define ASM_NOP8 K7_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC)
 #elif defined(CONFIG_X86_P6_NOP)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC)
 #elif defined(CONFIG_X86_64)
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC)
 #else
-#define ASM_NOP1 GENERIC_NOP1
-#define ASM_NOP2 GENERIC_NOP2
-#define ASM_NOP3 GENERIC_NOP3
-#define ASM_NOP4 GENERIC_NOP4
-#define ASM_NOP5 GENERIC_NOP5
-#define ASM_NOP6 GENERIC_NOP6
-#define ASM_NOP7 GENERIC_NOP7
-#define ASM_NOP8 GENERIC_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC)
 #endif
 
 #define ASM_NOP_MAX 8
+#define NOP_ATOMIC5 (ASM_NOP_MAX+1)	/* Entry for the 5-byte atomic NOP */
+
+#ifndef __ASSEMBLY__
+extern const unsigned char * const *ideal_nops;
+extern void arch_init_ideal_nops(void);
+#endif
 
 #endif /* _ASM_X86_NOPS_H */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 3d4dab43c994..bfacd2ccf651 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,12 +1,24 @@
 #ifndef _ASM_X86_NUMA_H
 #define _ASM_X86_NUMA_H
 
+#include <linux/nodemask.h>
+
 #include <asm/topology.h>
 #include <asm/apicdef.h>
 
 #ifdef CONFIG_NUMA
 
 #define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)
+#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
+
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
+extern int numa_off;
 
 /*
  * __apicid_to_node[] stores the raw mapping between physical apicid and
@@ -17,15 +29,27 @@
  * numa_cpu_node().
  */
 extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+extern nodemask_t numa_nodes_parsed __initdata;
+
+extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+extern void __init numa_set_distance(int from, int to, int distance);
 
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
 	__apicid_to_node[apicid] = node;
 }
+
+extern int __cpuinit numa_cpu_node(int cpu);
+
 #else	/* CONFIG_NUMA */
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
 }
+
+static inline int numa_cpu_node(int cpu)
+{
+	return NUMA_NO_NODE;
+}
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_X86_32
@@ -37,21 +61,25 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
-extern void __init numa_init_array(void);
 extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_add_cpu(int cpu);
 extern void __cpuinit numa_remove_cpu(int cpu);
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
-static inline void numa_init_array(void)		{ }
 static inline void init_cpu_to_node(void)		{ }
 static inline void numa_add_cpu(int cpu)		{ }
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
+void debug_cpumask_set_cpu(int cpu, int node, bool enable);
 #endif
 
+#ifdef CONFIG_NUMA_EMU
+#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
+#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
+void numa_emu_cmdline(char *);
+#endif /* CONFIG_NUMA_EMU */
+
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index c6beed1ef103..e7d6b8254742 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -1,16 +1,6 @@
 #ifndef _ASM_X86_NUMA_32_H
 #define _ASM_X86_NUMA_32_H
 
-extern int numa_off;
-
-extern int pxm_to_nid(int pxm);
-
-#ifdef CONFIG_NUMA
-extern int __cpuinit numa_cpu_node(int cpu);
-#else	/* CONFIG_NUMA */
-static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-#endif	/* CONFIG_NUMA */
-
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 344eb1790b46..0c05f7ae46e8 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -1,42 +1,6 @@
 #ifndef _ASM_X86_NUMA_64_H
 #define _ASM_X86_NUMA_64_H
 
-#include <linux/nodemask.h>
-
-struct bootnode {
-	u64 start;
-	u64 end;
-};
-
-#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-
-extern int numa_off;
-
 extern unsigned long numa_free_all_bootmem(void);
-extern void setup_node_bootmem(int nodeid, unsigned long start,
-			       unsigned long end);
-
-#ifdef CONFIG_NUMA
-/*
- * Too small node sizes may confuse the VM badly. Usually they
- * result from BIOS bugs. So dont recognize nodes as standalone
- * NUMA entities that have less than this amount of RAM listed:
- */
-#define NODE_MIN_SIZE (4*1024*1024)
-
-extern nodemask_t numa_nodes_parsed __initdata;
-
-extern int __cpuinit numa_cpu_node(int cpu);
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
-void numa_emu_cmdline(char *);
-#endif /* CONFIG_NUMA_EMU */
-#else
-static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-#endif
 
 #endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 37c516545ec8..c3b3c322fd87 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -29,7 +29,7 @@
 #ifdef CONFIG_X86_NUMAQ
 
 extern int found_numaq;
-extern int get_memcfg_numaq(void);
+extern int numaq_numa_init(void);
 extern int pci_numaq_init(void);
 
 extern void *xquad_portio;
@@ -166,11 +166,6 @@ struct sys_cfg_data {
 
 void numaq_tsc_disable(void);
 
-#else
-static inline int get_memcfg_numaq(void)
-{
-	return 0;
-}
 #endif /* CONFIG_X86_NUMAQ */
 #endif /* _ASM_X86_NUMAQ_H */
 
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index c5d3a5abbb9f..24487712e0b1 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -26,15 +26,12 @@ extern void setup_olpc_ofw_pgd(void);
 /* check if OFW was detected during boot */
 extern bool olpc_ofw_present(void);
 
+extern void olpc_dt_build_devicetree(void);
+
 #else /* !CONFIG_OLPC */
 static inline void olpc_ofw_detect(void) { }
 static inline void setup_olpc_ofw_pgd(void) { }
-#endif /* !CONFIG_OLPC */
-
-#ifdef CONFIG_OF_PROMTREE
-extern void olpc_dt_build_devicetree(void);
-#else
 static inline void olpc_dt_build_devicetree(void) { }
-#endif
+#endif /* !CONFIG_OLPC */
 
 #endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index d475b4398d8b..53278b0dfdf6 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -517,7 +517,7 @@ do {									\
 	typeof(o2) __o2 = o2;						\
 	typeof(o2) __n2 = n2;						\
 	typeof(o2) __dummy;						\
-	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4,	\
+	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP4,	\
 		       "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",	\
 		       X86_FEATURE_CX16,				\
 		       ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),		\
@@ -542,6 +542,33 @@ do {									\
 	old__;								\
 })
 
+static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+                        const unsigned long __percpu *addr)
+{
+	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+}
+
+static inline int x86_this_cpu_variable_test_bit(int nr,
+                        const unsigned long __percpu *addr)
+{
+	int oldbit;
+
+	asm volatile("bt "__percpu_arg(2)",%1\n\t"
+			"sbb %0,%0"
+			: "=r" (oldbit)
+			: "m" (*(unsigned long *)addr), "Ir" (nr));
+
+	return oldbit;
+}
+
+#define x86_this_cpu_test_bit(nr, addr)			\
+	(__builtin_constant_p((nr))			\
+	 ? x86_this_cpu_constant_test_bit((nr), (addr))	\
+	 : x86_this_cpu_variable_test_bit((nr), (addr)))
+
+
 #include <asm-generic/percpu.h>
 
 /* We can use this directly for local CPU (faster). */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723d1f32..d56187c6b838 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
+extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
 extern void native_pagetable_setup_start(pgd_t *base);
 extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h
new file mode 100644
index 000000000000..4950a0b1d09c
--- /dev/null
+++ b/arch/x86/include/asm/probe_roms.h
@@ -0,0 +1,8 @@
+#ifndef _PROBE_ROMS_H_
+#define _PROBE_ROMS_H_
+struct pci_dev;
+
+extern void __iomem *pci_map_biosrom(struct pci_dev *pdev);
+extern void pci_unmap_biosrom(void __iomem *rom);
+extern size_t pci_biosrom_size(struct pci_dev *pdev);
+#endif
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index a898a2b6e10c..59ab4dffa377 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -60,6 +60,7 @@
 #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
 #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
 #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
+#define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
 
 /*
  * x86-64 Task Priority Register, CR8
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index db8aa19a08a2..9756551ec760 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align);
  * executable.)
  */
 #define RESERVE_BRK(name,sz)						\
-	static void __section(.discard.text) __used			\
+	static void __section(.discard.text) __used notrace		\
 	__brk_reservation_fn_##name##__(void) {				\
 		asm volatile (						\
 			".pushsection .brk_reservation,\"aw\",@nobits;" \
@@ -104,10 +104,10 @@ void *extend_brk(size_t size, size_t align);
 	type *name;					\
 	RESERVE_BRK(name, sizeof(type) * entries)
 
+extern void probe_roms(void);
 #ifdef __i386__
 
 void __init i386_start_kernel(void);
-extern void probe_roms(void);
 
 #else
 void __init x86_64_start_kernel(char *real_mode);
diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h
deleted file mode 100644
index b508d639d1a7..000000000000
--- a/arch/x86/include/asm/srat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-
-#ifndef _ASM_X86_SRAT_H
-#define _ASM_X86_SRAT_H
-
-#ifdef CONFIG_ACPI_NUMA
-extern int get_memcfg_from_srat(void);
-#else
-static inline int get_memcfg_from_srat(void)
-{
-	return 0;
-}
-#endif
-
-#endif /* _ASM_X86_SRAT_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index d7e89c83645d..70bbe39043a9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo,
 /* Generic stack tracer with callbacks */
 
 struct stacktrace_ops {
-	void (*warning)(void *data, char *msg);
-	/* msg must contain %s for the symbol */
-	void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
 	void (*address)(void *data, unsigned long address, int reliable);
 	/* On negative return stop dumping */
 	int (*stack)(void *data, char *name);
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 12569e691ce3..c2ff2a1d845e 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -303,24 +303,81 @@ static inline void native_wbinvd(void)
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#define read_cr0()	(native_read_cr0())
-#define write_cr0(x)	(native_write_cr0(x))
-#define read_cr2()	(native_read_cr2())
-#define write_cr2(x)	(native_write_cr2(x))
-#define read_cr3()	(native_read_cr3())
-#define write_cr3(x)	(native_write_cr3(x))
-#define read_cr4()	(native_read_cr4())
-#define read_cr4_safe()	(native_read_cr4_safe())
-#define write_cr4(x)	(native_write_cr4(x))
-#define wbinvd()	(native_wbinvd())
+
+static inline unsigned long read_cr0(void)
+{
+	return native_read_cr0();
+}
+
+static inline void write_cr0(unsigned long x)
+{
+	native_write_cr0(x);
+}
+
+static inline unsigned long read_cr2(void)
+{
+	return native_read_cr2();
+}
+
+static inline void write_cr2(unsigned long x)
+{
+	native_write_cr2(x);
+}
+
+static inline unsigned long read_cr3(void)
+{
+	return native_read_cr3();
+}
+
+static inline void write_cr3(unsigned long x)
+{
+	native_write_cr3(x);
+}
+
+static inline unsigned long read_cr4(void)
+{
+	return native_read_cr4();
+}
+
+static inline unsigned long read_cr4_safe(void)
+{
+	return native_read_cr4_safe();
+}
+
+static inline void write_cr4(unsigned long x)
+{
+	native_write_cr4(x);
+}
+
+static inline void wbinvd(void)
+{
+	native_wbinvd();
+}
+
 #ifdef CONFIG_X86_64
-#define read_cr8()	(native_read_cr8())
-#define write_cr8(x)	(native_write_cr8(x))
-#define load_gs_index   native_load_gs_index
+
+static inline unsigned long read_cr8(void)
+{
+	return native_read_cr8();
+}
+
+static inline void write_cr8(unsigned long x)
+{
+	native_write_cr8(x);
+}
+
+static inline void load_gs_index(unsigned selector)
+{
+	native_load_gs_index(selector);
+}
+
 #endif
 
 /* Clear the 'TS' bit */
-#define clts()		(native_clts())
+static inline void clts(void)
+{
+	native_clts();
+}
 
 #endif/* CONFIG_PARAVIRT */
 
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 910a7084f7f2..c00692476e9f 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void);
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
 
 #ifdef CONFIG_X86_32
-extern unsigned long node_start_pfn[];
-extern unsigned long node_end_pfn[];
-extern unsigned long node_remap_size[];
-#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
-
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-
 #else
-
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-
 #endif
 
 /* sched_domains SD_NODE_INIT for NUMA machines */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index abd3e0ea762a..99ddd148a760 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -6,7 +6,6 @@
 #include <linux/errno.h>
 #include <linux/compiler.h>
 #include <linux/thread_info.h>
-#include <linux/prefetch.h>
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
@@ -42,7 +41,7 @@
  * Returns 0 if the range is valid, nonzero otherwise.
  *
  * This is equivalent to the following test:
- * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
+ * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
  *
  * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
  */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 088d09fb1615..566e803cc602 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -6,7 +6,6 @@
  */
 #include <linux/errno.h>
 #include <linux/thread_info.h>
-#include <linux/prefetch.h>
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 316708d5af92..1c66d30971ad 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -6,7 +6,6 @@
  */
 #include <linux/compiler.h>
 #include <linux/errno.h>
-#include <linux/prefetch.h>
 #include <linux/lockdep.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c3..130f1eeee5fe 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
 /* after this # consecutive successes, bump up the throttle if it was lowered */
 #define COMPLETE_THRESHOLD 5
 
+#define UV_LB_SUBNODEID 0x10
+
 /*
  * number of entries in the destination side payload queue
  */
@@ -124,7 +126,7 @@
  * The distribution specification (32 bytes) is interpreted as a 256-bit
  * distribution vector. Adjacent bits correspond to consecutive even numbered
  * nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nodeid' field of the header corresponds to the
+ * 'base_dest_nasid' field of the header corresponds to the
  * destination nodeID associated with that specified bit.
  */
 struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
 struct bau_msg_header {
 	unsigned int dest_subnodeid:6;	/* must be 0x10, for the LB */
 	/* bits 5:0 */
-	unsigned int base_dest_nodeid:15; /* nasid of the */
+	unsigned int base_dest_nasid:15; /* nasid of the */
 	/* bits 20:6 */			  /* first bit in uvhub map */
 	unsigned int command:8;	/* message type */
 	/* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
 	unsigned long d_rcanceled; /* number of messages canceled by resets */
 };
 
+struct hub_and_pnode {
+	short uvhub;
+	short pnode;
+};
 /*
  * one per-cpu; to locate the software tables
  */
@@ -399,10 +405,12 @@ struct bau_control {
 	int baudisabled;
 	int set_bau_off;
 	short cpu;
+	short osnode;
 	short uvhub_cpu;
 	short uvhub;
 	short cpus_in_socket;
 	short cpus_in_uvhub;
+	short partition_base_pnode;
 	unsigned short message_number;
 	unsigned short uvhub_quiesce;
 	short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
 	int congested_period;
 	cycles_t period_time;
 	long period_requests;
+	struct hub_and_pnode *target_hub_and_pnode;
 };
 
 static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
 {
 	return constant_test_bit(uvhub, &dstp->bits[0]);
 }
-static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
+static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
 {
-	__set_bit(uvhub, &dstp->bits[0]);
+	__set_bit(pnode, &dstp->bits[0]);
 }
 static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
 				    int nbits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c2335..4298002d0c83 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
 	unsigned short	nr_online_cpus;
 	unsigned short	pnode;
 	short		memory_nid;
+	spinlock_t	nmi_lock;
+	unsigned long	nmi_count;
 };
 extern struct uv_blade_info *uv_blade_info;
 extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac7455..f5bb64a823d7 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                               UVH_SCRATCH5                                */
+/* ========================================================================= */
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x00778
+
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+union uvh_scratch5_u {
+    unsigned long	v;
+    struct uvh_scratch5_s {
+	unsigned long	scratch5 : 64;  /* RW, W1CS */
+    } s;
+};
 
 #endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2e2ad8..d3d859035af9 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,17 @@ struct x86_init_oem {
 };
 
 /**
+ * struct x86_init_mapping - platform specific initial kernel pagetable setup
+ * @pagetable_reserve:	reserve a range of addresses for kernel pagetable usage
+ *
+ * For more details on the purpose of this hook, look in
+ * init_memory_mapping and the commit that added it.
+ */
+struct x86_init_mapping {
+	void (*pagetable_reserve)(u64 start, u64 end);
+};
+
+/**
  * struct x86_init_paging - platform specific paging functions
  * @pagetable_setup_start:	platform specific pre paging_init() call
  * @pagetable_setup_done:	platform specific post paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
 	struct x86_init_mpparse		mpparse;
 	struct x86_init_irqs		irqs;
 	struct x86_init_oem		oem;
+	struct x86_init_mapping		mapping;
 	struct x86_init_paging		paging;
 	struct x86_init_timers		timers;
 	struct x86_init_iommu		iommu;
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c61934fbf22a..64a619d47d34 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 					     unsigned long pfn_e);
 
-extern int m2p_add_override(unsigned long mfn, struct page *page);
-extern int m2p_remove_override(struct page *page);
+extern int m2p_add_override(unsigned long mfn, struct page *page,
+			    bool clear_pte);
+extern int m2p_remove_override(struct page *page, bool clear_pte);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index aa8620989162..4fbda9a3f339 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
 #endif
 #if defined(CONFIG_XEN_DOM0)
 void __init xen_setup_pirqs(void);
+int xen_find_device_domain_owner(struct pci_dev *dev);
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
+int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
 static inline void __init xen_setup_pirqs(void)
 {
 }
+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
+						   uint16_t domain)
+{
+	return -1;
+}
+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
 #endif
 
 #if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7338ef2218bc..250806472a7e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,7 @@ obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
-obj-$(CONFIG_X86_32)	+= probe_roms_32.o
+obj-y			+= probe_roms.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
@@ -117,7 +117,7 @@ obj-$(CONFIG_OF)			+= devicetree.o
 ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_AUDIT)		+= audit_64.o
 
-	obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
+	obj-$(CONFIG_GART_IOMMU)	+= amd_gart_64.o aperture_64.o
 	obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
 	obj-$(CONFIG_AMD_IOMMU)		+= amd_iommu_init.o amd_iommu.o
 
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ff93bc1b09c3..18a857ba7a25 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -112,11 +112,6 @@ static int __init acpi_sleep_setup(char *str)
 #ifdef CONFIG_HIBERNATION
 		if (strncmp(str, "s4_nohwsig", 10) == 0)
 			acpi_no_s4_hw_signature();
-		if (strncmp(str, "s4_nonvs", 8) == 0) {
-			pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, "
-					"please use acpi_sleep=nonvs instead");
-			acpi_nvs_nosave();
-		}
 #endif
 		if (strncmp(str, "nonvs", 5) == 0)
 			acpi_nvs_nosave();
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..a81f2d52f869 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -67,17 +67,30 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #define DPRINTK(fmt, args...) if (debug_alternative) \
 	printk(KERN_DEBUG fmt, args)
 
+/*
+ * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
+ * that correspond to that nop. Getting from one nop to the next, we
+ * add to the array the offset that is equal to the sum of all sizes of
+ * nops preceding the one we are after.
+ *
+ * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
+ * nice symmetry of sizes of the previous nops.
+ */
 #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
-/* Use inline assembly to define this because the nops are defined
-   as inline assembly strings in the include files and we cannot
-   get them easily into strings. */
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
-	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
-	GENERIC_NOP7 GENERIC_NOP8
-    "\t.previous");
-extern const unsigned char intelnops[];
-static const unsigned char *const __initconst_or_module
-intel_nops[ASM_NOP_MAX+1] = {
+static const unsigned char intelnops[] =
+{
+	GENERIC_NOP1,
+	GENERIC_NOP2,
+	GENERIC_NOP3,
+	GENERIC_NOP4,
+	GENERIC_NOP5,
+	GENERIC_NOP6,
+	GENERIC_NOP7,
+	GENERIC_NOP8,
+	GENERIC_NOP5_ATOMIC
+};
+static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	intelnops,
 	intelnops + 1,
@@ -87,17 +100,25 @@ intel_nops[ASM_NOP_MAX+1] = {
 	intelnops + 1 + 2 + 3 + 4 + 5,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #ifdef K8_NOP1
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
-	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-	K8_NOP7 K8_NOP8
-    "\t.previous");
-extern const unsigned char k8nops[];
-static const unsigned char *const __initconst_or_module
-k8_nops[ASM_NOP_MAX+1] = {
+static const unsigned char k8nops[] =
+{
+	K8_NOP1,
+	K8_NOP2,
+	K8_NOP3,
+	K8_NOP4,
+	K8_NOP5,
+	K8_NOP6,
+	K8_NOP7,
+	K8_NOP8,
+	K8_NOP5_ATOMIC
+};
+static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	k8nops,
 	k8nops + 1,
@@ -107,17 +128,25 @@ k8_nops[ASM_NOP_MAX+1] = {
 	k8nops + 1 + 2 + 3 + 4 + 5,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #if defined(K7_NOP1) && !defined(CONFIG_X86_64)
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
-	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-	K7_NOP7 K7_NOP8
-    "\t.previous");
-extern const unsigned char k7nops[];
-static const unsigned char *const __initconst_or_module
-k7_nops[ASM_NOP_MAX+1] = {
+static const unsigned char k7nops[] =
+{
+	K7_NOP1,
+	K7_NOP2,
+	K7_NOP3,
+	K7_NOP4,
+	K7_NOP5,
+	K7_NOP6,
+	K7_NOP7,
+	K7_NOP8,
+	K7_NOP5_ATOMIC
+};
+static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	k7nops,
 	k7nops + 1,
@@ -127,17 +156,25 @@ k7_nops[ASM_NOP_MAX+1] = {
 	k7nops + 1 + 2 + 3 + 4 + 5,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #ifdef P6_NOP1
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
-	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
-	P6_NOP7 P6_NOP8
-    "\t.previous");
-extern const unsigned char p6nops[];
-static const unsigned char *const __initconst_or_module
-p6_nops[ASM_NOP_MAX+1] = {
+static const unsigned char  __initconst_or_module p6nops[] =
+{
+	P6_NOP1,
+	P6_NOP2,
+	P6_NOP3,
+	P6_NOP4,
+	P6_NOP5,
+	P6_NOP6,
+	P6_NOP7,
+	P6_NOP8,
+	P6_NOP5_ATOMIC
+};
+static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	p6nops,
 	p6nops + 1,
@@ -147,47 +184,65 @@ p6_nops[ASM_NOP_MAX+1] = {
 	p6nops + 1 + 2 + 3 + 4 + 5,
 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
+/* Initialize these to a safe default */
 #ifdef CONFIG_X86_64
+const unsigned char * const *ideal_nops = p6_nops;
+#else
+const unsigned char * const *ideal_nops = intel_nops;
+#endif
 
-extern char __vsyscall_0;
-static const unsigned char *const *__init_or_module find_nop_table(void)
+void __init arch_init_ideal_nops(void)
 {
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-	    boot_cpu_has(X86_FEATURE_NOPL))
-		return p6_nops;
-	else
-		return k8_nops;
-}
-
-#else /* CONFIG_X86_64 */
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		/*
+		 * Due to a decoder implementation quirk, some
+		 * specific Intel CPUs actually perform better with
+		 * the "k8_nops" than with the SDM-recommended NOPs.
+		 */
+		if (boot_cpu_data.x86 == 6 &&
+		    boot_cpu_data.x86_model >= 0x0f &&
+		    boot_cpu_data.x86_model != 0x1c &&
+		    boot_cpu_data.x86_model != 0x26 &&
+		    boot_cpu_data.x86_model != 0x27 &&
+		    boot_cpu_data.x86_model < 0x30) {
+			ideal_nops = k8_nops;
+		} else if (boot_cpu_has(X86_FEATURE_NOPL)) {
+			   ideal_nops = p6_nops;
+		} else {
+#ifdef CONFIG_X86_64
+			ideal_nops = k8_nops;
+#else
+			ideal_nops = intel_nops;
+#endif
+		}
 
-static const unsigned char *const *__init_or_module find_nop_table(void)
-{
-	if (boot_cpu_has(X86_FEATURE_K8))
-		return k8_nops;
-	else if (boot_cpu_has(X86_FEATURE_K7))
-		return k7_nops;
-	else if (boot_cpu_has(X86_FEATURE_NOPL))
-		return p6_nops;
-	else
-		return intel_nops;
+	default:
+#ifdef CONFIG_X86_64
+		ideal_nops = k8_nops;
+#else
+		if (boot_cpu_has(X86_FEATURE_K8))
+			ideal_nops = k8_nops;
+		else if (boot_cpu_has(X86_FEATURE_K7))
+			ideal_nops = k7_nops;
+		else
+			ideal_nops = intel_nops;
+#endif
+	}
 }
 
-#endif /* CONFIG_X86_64 */
-
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
 static void __init_or_module add_nops(void *insns, unsigned int len)
 {
-	const unsigned char *const *noptable = find_nop_table();
-
 	while (len > 0) {
 		unsigned int noplen = len;
 		if (noplen > ASM_NOP_MAX)
 			noplen = ASM_NOP_MAX;
-		memcpy(insns, noptable[noplen], noplen);
+		memcpy(insns, ideal_nops[noplen], noplen);
 		insns += noplen;
 		len -= noplen;
 	}
@@ -195,6 +250,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
+extern char __vsyscall_0;
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 /* Replace instructions with better alternatives for this CPU type.
@@ -210,6 +266,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
 	u8 insnbuf[MAX_PATCH_LEN];
 
 	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+	/*
+	 * The scan order should be from start to end. A later scanned
+	 * alternative code can overwrite a previous scanned alternative code.
+	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
+	 * patch code.
+	 *
+	 * So be careful if you want to change the scan order to any other
+	 * order.
+	 */
 	for (a = start; a < end; a++) {
 		u8 *instr = a->instr;
 		BUG_ON(a->replacementlen > a->instrlen);
@@ -678,29 +743,3 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 	wrote_text = 0;
 	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
 }
-
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-
-#ifdef CONFIG_X86_64
-unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
-#else
-unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
-#endif
-
-void __init arch_init_ideal_nop5(void)
-{
-	/*
-	 * There is no good nop for all x86 archs.  This selection
-	 * algorithm should be unified with the one in find_nop_table(),
-	 * but this should be good enough for now.
-	 *
-	 * For cases other than the ones below, use the safe (as in
-	 * always functional) defaults above.
-	 */
-#ifdef CONFIG_X86_64
-	/* Don't use these on 32 bits due to broken virtualizers */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		memcpy(ideal_nop5, p6_nops[5], 5);
-#endif
-}
-#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 82ada01625b9..b117efd24f71 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -81,6 +81,9 @@ static u32 gart_unmapped_entry;
 #define AGPEXTERN
 #endif
 
+/* GART can only remap to physical addresses < 1TB */
+#define GART_MAX_PHYS_ADDR	(1ULL << 40)
+
 /* backdoor interface to AGP driver */
 AGPEXTERN int agp_memory_reserved;
 AGPEXTERN __u32 *agp_gatt_table;
@@ -212,9 +215,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 				size_t size, int dir, unsigned long align_mask)
 {
 	unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
-	unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
+	unsigned long iommu_page;
 	int i;
 
+	if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
+		return bad_dma_addr;
+
+	iommu_page = alloc_iommu(dev, npages, align_mask);
 	if (iommu_page == -1) {
 		if (!nonforced_iommu(dev, phys_mem, size))
 			return phys_mem;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 57ca77787220..873e7e1ead7b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/pci-ats.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -25,6 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/iommu-helper.h>
 #include <linux/iommu.h>
+#include <linux/delay.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -34,7 +36,7 @@
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
-#define EXIT_LOOP_COUNT 10000000
+#define LOOP_TIMEOUT	100000
 
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
@@ -57,7 +59,6 @@ struct iommu_cmd {
 	u32 data[4];
 };
 
-static void reset_iommu_command_buffer(struct amd_iommu *iommu);
 static void update_domain(struct protection_domain *domain);
 
 /****************************************************************************
@@ -322,8 +323,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		break;
 	case EVENT_TYPE_ILL_CMD:
 		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
-		iommu->reset_in_progress = true;
-		reset_iommu_command_buffer(iommu);
 		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
@@ -367,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-irqreturn_t amd_iommu_int_handler(int irq, void *data)
+irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
 	struct amd_iommu *iommu;
 
@@ -377,192 +376,300 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t amd_iommu_int_handler(int irq, void *data)
+{
+	return IRQ_WAKE_THREAD;
+}
+
 /****************************************************************************
  *
  * IOMMU command queuing functions
  *
  ****************************************************************************/
 
-/*
- * Writes the command to the IOMMUs command buffer and informs the
- * hardware about the new command. Must be called with iommu->lock held.
- */
-static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+static int wait_on_sem(volatile u64 *sem)
+{
+	int i = 0;
+
+	while (*sem == 0 && i < LOOP_TIMEOUT) {
+		udelay(1);
+		i += 1;
+	}
+
+	if (i == LOOP_TIMEOUT) {
+		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void copy_cmd_to_buffer(struct amd_iommu *iommu,
+			       struct iommu_cmd *cmd,
+			       u32 tail)
 {
-	u32 tail, head;
 	u8 *target;
 
-	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
-	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 	target = iommu->cmd_buf + tail;
-	memcpy_toio(target, cmd, sizeof(*cmd));
-	tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
-	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	if (tail == head)
-		return -ENOMEM;
+	tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+
+	/* Copy command to buffer */
+	memcpy(target, cmd, sizeof(*cmd));
+
+	/* Tell the IOMMU about it */
 	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+}
 
-	return 0;
+static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
+{
+	WARN_ON(address & 0x7ULL);
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
+	cmd->data[1] = upper_32_bits(__pa(address));
+	cmd->data[2] = 1;
+	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
+}
+
+static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = devid;
+	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
+}
+
+static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+				  size_t size, u16 domid, int pde)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[1] |= domid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+	if (s) /* size bit - we flush more than one 4kb page */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+}
+
+static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
+				  u64 address, size_t size)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0]  = devid;
+	cmd->data[0] |= (qdep & 0xff) << 24;
+	cmd->data[1]  = devid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+	if (s)
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+}
+
+static void build_inv_all(struct iommu_cmd *cmd)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	CMD_SET_TYPE(cmd, CMD_INV_ALL);
 }
 
 /*
- * General queuing function for commands. Takes iommu->lock and calls
- * __iommu_queue_command().
+ * Writes the command to the IOMMUs command buffer and informs the
+ * hardware about the new command.
  */
 static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
+	u32 left, tail, head, next_tail;
 	unsigned long flags;
-	int ret;
 
+	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
+
+again:
 	spin_lock_irqsave(&iommu->lock, flags);
-	ret = __iommu_queue_command(iommu, cmd);
-	if (!ret)
-		iommu->need_sync = true;
-	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	return ret;
-}
+	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+	left      = (head - next_tail) % iommu->cmd_buf_size;
 
-/*
- * This function waits until an IOMMU has completed a completion
- * wait command
- */
-static void __iommu_wait_for_completion(struct amd_iommu *iommu)
-{
-	int ready = 0;
-	unsigned status = 0;
-	unsigned long i = 0;
+	if (left <= 2) {
+		struct iommu_cmd sync_cmd;
+		volatile u64 sem = 0;
+		int ret;
 
-	INC_STATS_COUNTER(compl_wait);
+		build_completion_wait(&sync_cmd, (u64)&sem);
+		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
 
-	while (!ready && (i < EXIT_LOOP_COUNT)) {
-		++i;
-		/* wait for the bit to become one */
-		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
-		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
+		spin_unlock_irqrestore(&iommu->lock, flags);
+
+		if ((ret = wait_on_sem(&sem)) != 0)
+			return ret;
+
+		goto again;
 	}
 
-	/* set bit back to zero */
-	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
-	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+	copy_cmd_to_buffer(iommu, cmd, tail);
+
+	/* We need to sync now to make sure all commands are processed */
+	iommu->need_sync = true;
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	if (unlikely(i == EXIT_LOOP_COUNT))
-		iommu->reset_in_progress = true;
+	return 0;
 }
 
 /*
  * This function queues a completion wait command into the command
  * buffer of an IOMMU
  */
-static int __iommu_completion_wait(struct amd_iommu *iommu)
+static int iommu_completion_wait(struct amd_iommu *iommu)
 {
 	struct iommu_cmd cmd;
+	volatile u64 sem = 0;
+	int ret;
 
-	 memset(&cmd, 0, sizeof(cmd));
-	 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
-	 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+	if (!iommu->need_sync)
+		return 0;
 
-	 return __iommu_queue_command(iommu, &cmd);
+	build_completion_wait(&cmd, (u64)&sem);
+
+	ret = iommu_queue_command(iommu, &cmd);
+	if (ret)
+		return ret;
+
+	return wait_on_sem(&sem);
 }
 
-/*
- * This function is called whenever we need to ensure that the IOMMU has
- * completed execution of all commands we sent. It sends a
- * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
- * us about that by writing a value to a physical address we pass with
- * the command.
- */
-static int iommu_completion_wait(struct amd_iommu *iommu)
+static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
 {
-	int ret = 0;
-	unsigned long flags;
+	struct iommu_cmd cmd;
 
-	spin_lock_irqsave(&iommu->lock, flags);
+	build_inv_dte(&cmd, devid);
 
-	if (!iommu->need_sync)
-		goto out;
+	return iommu_queue_command(iommu, &cmd);
+}
 
-	ret = __iommu_completion_wait(iommu);
+static void iommu_flush_dte_all(struct amd_iommu *iommu)
+{
+	u32 devid;
 
-	iommu->need_sync = false;
+	for (devid = 0; devid <= 0xffff; ++devid)
+		iommu_flush_dte(iommu, devid);
 
-	if (ret)
-		goto out;
-
-	__iommu_wait_for_completion(iommu);
+	iommu_completion_wait(iommu);
+}
 
-out:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+/*
+ * This function uses heavy locking and may disable irqs for some time. But
+ * this is no issue because it is only called during resume.
+ */
+static void iommu_flush_tlb_all(struct amd_iommu *iommu)
+{
+	u32 dom_id;
 
-	if (iommu->reset_in_progress)
-		reset_iommu_command_buffer(iommu);
+	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
+		struct iommu_cmd cmd;
+		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+				      dom_id, 1);
+		iommu_queue_command(iommu, &cmd);
+	}
 
-	return 0;
+	iommu_completion_wait(iommu);
 }
 
-static void iommu_flush_complete(struct protection_domain *domain)
+static void iommu_flush_all(struct amd_iommu *iommu)
 {
-	int i;
+	struct iommu_cmd cmd;
 
-	for (i = 0; i < amd_iommus_present; ++i) {
-		if (!domain->dev_iommu[i])
-			continue;
+	build_inv_all(&cmd);
 
-		/*
-		 * Devices of this domain are behind this IOMMU
-		 * We need to wait for completion of all commands.
-		 */
-		iommu_completion_wait(amd_iommus[i]);
+	iommu_queue_command(iommu, &cmd);
+	iommu_completion_wait(iommu);
+}
+
+void iommu_flush_all_caches(struct amd_iommu *iommu)
+{
+	if (iommu_feature(iommu, FEATURE_IA)) {
+		iommu_flush_all(iommu);
+	} else {
+		iommu_flush_dte_all(iommu);
+		iommu_flush_tlb_all(iommu);
 	}
 }
 
 /*
- * Command send function for invalidating a device table entry
+ * Command send function for flushing on-device TLB
  */
-static int iommu_flush_device(struct device *dev)
+static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct amd_iommu *iommu;
 	struct iommu_cmd cmd;
 	u16 devid;
+	int qdep;
 
+	qdep  = pci_ats_queue_depth(pdev);
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	/* Build command */
-	memset(&cmd, 0, sizeof(cmd));
-	CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
-	cmd.data[0] = devid;
+	build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
 
 	return iommu_queue_command(iommu, &cmd);
 }
 
-static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
-					  u16 domid, int pde, int s)
-{
-	memset(cmd, 0, sizeof(*cmd));
-	address &= PAGE_MASK;
-	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
-	cmd->data[1] |= domid;
-	cmd->data[2] = lower_32_bits(address);
-	cmd->data[3] = upper_32_bits(address);
-	if (s) /* size bit - we flush more than one 4kb page */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
-}
-
 /*
- * Generic command send function for invalidaing TLB entries
+ * Command send function for invalidating a device table entry
  */
-static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
-		u64 address, u16 domid, int pde, int s)
+static int device_flush_dte(struct device *dev)
 {
-	struct iommu_cmd cmd;
+	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	u16 devid;
 	int ret;
 
-	__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
+	pdev  = to_pci_dev(dev);
+	devid = get_device_id(dev);
+	iommu = amd_iommu_rlookup_table[devid];
 
-	ret = iommu_queue_command(iommu, &cmd);
+	ret = iommu_flush_dte(iommu, devid);
+	if (ret)
+		return ret;
+
+	if (pci_ats_enabled(pdev))
+		ret = device_flush_iotlb(dev, 0, ~0UL);
 
 	return ret;
 }
@@ -572,23 +679,14 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
  * It invalidates a single PTE if the range to flush is within a single
  * page. Otherwise it flushes the whole TLB of the IOMMU.
  */
-static void __iommu_flush_pages(struct protection_domain *domain,
-				u64 address, size_t size, int pde)
+static void __domain_flush_pages(struct protection_domain *domain,
+				 u64 address, size_t size, int pde)
 {
-	int s = 0, i;
-	unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
-
-	address &= PAGE_MASK;
-
-	if (pages > 1) {
-		/*
-		 * If we have to flush more than one page, flush all
-		 * TLB entries for this domain
-		 */
-		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
-		s = 1;
-	}
+	struct iommu_dev_data *dev_data;
+	struct iommu_cmd cmd;
+	int ret = 0, i;
 
+	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
 
 	for (i = 0; i < amd_iommus_present; ++i) {
 		if (!domain->dev_iommu[i])
@@ -598,101 +696,70 @@ static void __iommu_flush_pages(struct protection_domain *domain,
 		 * Devices of this domain are behind this IOMMU
 		 * We need a TLB flush
 		 */
-		iommu_queue_inv_iommu_pages(amd_iommus[i], address,
-					    domain->id, pde, s);
+		ret |= iommu_queue_command(amd_iommus[i], &cmd);
+	}
+
+	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
+
+		if (!pci_ats_enabled(pdev))
+			continue;
+
+		ret |= device_flush_iotlb(dev_data->dev, address, size);
 	}
 
-	return;
+	WARN_ON(ret);
 }
 
-static void iommu_flush_pages(struct protection_domain *domain,
-			     u64 address, size_t size)
+static void domain_flush_pages(struct protection_domain *domain,
+			       u64 address, size_t size)
 {
-	__iommu_flush_pages(domain, address, size, 0);
+	__domain_flush_pages(domain, address, size, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain */
-static void iommu_flush_tlb(struct protection_domain *domain)
+static void domain_flush_tlb(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void iommu_flush_tlb_pde(struct protection_domain *domain)
+static void domain_flush_tlb_pde(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
-}
-
-
-/*
- * This function flushes the DTEs for all devices in domain
- */
-static void iommu_flush_domain_devices(struct protection_domain *domain)
-{
-	struct iommu_dev_data *dev_data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&domain->lock, flags);
-
-	list_for_each_entry(dev_data, &domain->dev_list, list)
-		iommu_flush_device(dev_data->dev);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
-static void iommu_flush_all_domain_devices(void)
+static void domain_flush_complete(struct protection_domain *domain)
 {
-	struct protection_domain *domain;
-	unsigned long flags;
+	int i;
 
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
+	for (i = 0; i < amd_iommus_present; ++i) {
+		if (!domain->dev_iommu[i])
+			continue;
 
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		iommu_flush_domain_devices(domain);
-		iommu_flush_complete(domain);
+		/*
+		 * Devices of this domain are behind this IOMMU
+		 * We need to wait for completion of all commands.
+		 */
+		iommu_completion_wait(amd_iommus[i]);
 	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
 }
 
-void amd_iommu_flush_all_devices(void)
-{
-	iommu_flush_all_domain_devices();
-}
 
 /*
- * This function uses heavy locking and may disable irqs for some time. But
- * this is no issue because it is only called during resume.
+ * This function flushes the DTEs for all devices in domain
  */
-void amd_iommu_flush_all_domains(void)
+static void domain_flush_devices(struct protection_domain *domain)
 {
-	struct protection_domain *domain;
+	struct iommu_dev_data *dev_data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		spin_lock(&domain->lock);
-		iommu_flush_tlb_pde(domain);
-		iommu_flush_complete(domain);
-		spin_unlock(&domain->lock);
-	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
-static void reset_iommu_command_buffer(struct amd_iommu *iommu)
-{
-	pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
-
-	if (iommu->reset_in_progress)
-		panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
+	spin_lock_irqsave(&domain->lock, flags);
 
-	amd_iommu_reset_cmd_buffer(iommu);
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
+	list_for_each_entry(dev_data, &domain->dev_list, list)
+		device_flush_dte(dev_data->dev);
 
-	iommu->reset_in_progress = false;
+	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 /****************************************************************************
@@ -1410,17 +1477,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
 	return domain->flags & PD_DMA_OPS_MASK;
 }
 
-static void set_dte_entry(u16 devid, struct protection_domain *domain)
+static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 {
 	u64 pte_root = virt_to_phys(domain->pt_root);
+	u32 flags = 0;
 
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
-	amd_iommu_dev_table[devid].data[2] = domain->id;
-	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
-	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+	if (ats)
+		flags |= DTE_FLAG_IOTLB;
+
+	amd_iommu_dev_table[devid].data[3] |= flags;
+	amd_iommu_dev_table[devid].data[2]  = domain->id;
+	amd_iommu_dev_table[devid].data[1]  = upper_32_bits(pte_root);
+	amd_iommu_dev_table[devid].data[0]  = lower_32_bits(pte_root);
 }
 
 static void clear_dte_entry(u16 devid)
@@ -1437,34 +1509,42 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	bool ats = false;
 	u16 devid;
 
 	devid    = get_device_id(dev);
 	iommu    = amd_iommu_rlookup_table[devid];
 	dev_data = get_dev_data(dev);
+	pdev     = to_pci_dev(dev);
+
+	if (amd_iommu_iotlb_sup)
+		ats = pci_ats_enabled(pdev);
 
 	/* Update data structures */
 	dev_data->domain = domain;
 	list_add(&dev_data->list, &domain->dev_list);
-	set_dte_entry(devid, domain);
+	set_dte_entry(devid, domain, ats);
 
 	/* Do reference counting */
 	domain->dev_iommu[iommu->index] += 1;
 	domain->dev_cnt                 += 1;
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 static void do_detach(struct device *dev)
 {
 	struct iommu_dev_data *dev_data;
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
 	u16 devid;
 
 	devid    = get_device_id(dev);
 	iommu    = amd_iommu_rlookup_table[devid];
 	dev_data = get_dev_data(dev);
+	pdev     = to_pci_dev(dev);
 
 	/* decrease reference counters */
 	dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -1476,7 +1556,7 @@ static void do_detach(struct device *dev)
 	clear_dte_entry(devid);
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 /*
@@ -1539,9 +1619,13 @@ out_unlock:
 static int attach_device(struct device *dev,
 			 struct protection_domain *domain)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 	int ret;
 
+	if (amd_iommu_iotlb_sup)
+		pci_enable_ats(pdev, PAGE_SHIFT);
+
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev, domain);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -1551,7 +1635,7 @@ static int attach_device(struct device *dev,
 	 * left the caches in the IOMMU dirty. So we have to flush
 	 * here to evict all dirty stuff.
 	 */
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return ret;
 }
@@ -1598,12 +1682,16 @@ static void __detach_device(struct device *dev)
  */
 static void detach_device(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 
 	/* lock device table */
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__detach_device(dev);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+	if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
+		pci_disable_ats(pdev);
 }
 
 /*
@@ -1692,7 +1780,7 @@ static int device_change_notifier(struct notifier_block *nb,
 		goto out;
 	}
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 
 out:
@@ -1753,8 +1841,9 @@ static void update_device_table(struct protection_domain *domain)
 	struct iommu_dev_data *dev_data;
 
 	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
 		u16 devid = get_device_id(dev_data->dev);
-		set_dte_entry(devid, domain);
+		set_dte_entry(devid, domain, pci_ats_enabled(pdev));
 	}
 }
 
@@ -1764,8 +1853,9 @@ static void update_domain(struct protection_domain *domain)
 		return;
 
 	update_device_table(domain);
-	iommu_flush_domain_devices(domain);
-	iommu_flush_tlb_pde(domain);
+
+	domain_flush_devices(domain);
+	domain_flush_tlb_pde(domain);
 
 	domain->updated = false;
 }
@@ -1924,10 +2014,10 @@ retry:
 	ADD_STATS_COUNTER(alloced_io_mem, size);
 
 	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
-		iommu_flush_tlb(&dma_dom->domain);
+		domain_flush_tlb(&dma_dom->domain);
 		dma_dom->need_flush = false;
 	} else if (unlikely(amd_iommu_np_cache))
-		iommu_flush_pages(&dma_dom->domain, address, size);
+		domain_flush_pages(&dma_dom->domain, address, size);
 
 out:
 	return address;
@@ -1976,7 +2066,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
 	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-		iommu_flush_pages(&dma_dom->domain, flush_addr, size);
+		domain_flush_pages(&dma_dom->domain, flush_addr, size);
 		dma_dom->need_flush = false;
 	}
 }
@@ -2012,7 +2102,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 	if (addr == DMA_ERROR_CODE)
 		goto out;
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -2039,7 +2129,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, dir);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2104,7 +2194,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 			goto unmap;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -2150,7 +2240,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		s->dma_address = s->dma_length = 0;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2200,7 +2290,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 		goto out_free;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2232,7 +2322,7 @@ static void free_coherent(struct device *dev, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2476,7 +2566,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 	if (!iommu)
 		return;
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 }
 
@@ -2542,7 +2632,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
 	mutex_unlock(&domain->api_lock);
 
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return get_order(unmap_size);
 }
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 246d727b65b7..9179c21120a8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -137,6 +137,7 @@ int amd_iommus_present;
 
 /* IOMMUs have a non-present cache? */
 bool amd_iommu_np_cache __read_mostly;
+bool amd_iommu_iotlb_sup __read_mostly = true;
 
 /*
  * The ACPI table parsing functions set this variable on an error
@@ -180,6 +181,12 @@ static u32 dev_table_size;	/* size of the device table */
 static u32 alias_table_size;	/* size of the alias table */
 static u32 rlookup_table_size;	/* size if the rlookup table */
 
+/*
+ * This function flushes all internal caches of
+ * the IOMMU used by this driver.
+ */
+extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+
 static inline void update_last_devid(u16 devid)
 {
 	if (devid > amd_iommu_last_bdf)
@@ -293,9 +300,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
-	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
+	static const char * const feat_str[] = {
+		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
+		"IA", "GA", "HE", "PC", NULL
+	};
+	int i;
+
+	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
 	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
 
+	if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
+		printk(KERN_CONT " extended features: ");
+		for (i = 0; feat_str[i]; ++i)
+			if (iommu_feature(iommu, (1ULL << i)))
+				printk(KERN_CONT " %s", feat_str[i]);
+	}
+	printk(KERN_CONT "\n");
+
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
@@ -651,7 +672,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 {
 	int cap_ptr = iommu->cap_ptr;
-	u32 range, misc;
+	u32 range, misc, low, high;
 	int i, j;
 
 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
@@ -667,6 +688,15 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
 
+	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
+		amd_iommu_iotlb_sup = false;
+
+	/* read extended feature bits */
+	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
+	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
+
+	iommu->features = ((u64)high << 32) | low;
+
 	if (!is_rd890_iommu(iommu->dev))
 		return;
 
@@ -1004,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 	if (pci_enable_msi(iommu->dev))
 		return 1;
 
-	r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
-			IRQF_SAMPLE_RANDOM,
-			"AMD-Vi",
-			NULL);
+	r = request_threaded_irq(iommu->dev->irq,
+				 amd_iommu_int_handler,
+				 amd_iommu_int_thread,
+				 0, "AMD-Vi",
+				 iommu->dev);
 
 	if (r) {
 		pci_disable_msi(iommu->dev);
@@ -1244,6 +1275,7 @@ static void enable_iommus(void)
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
 		iommu_enable(iommu);
+		iommu_flush_all_caches(iommu);
 	}
 }
 
@@ -1274,8 +1306,8 @@ static void amd_iommu_resume(void)
 	 * we have to flush after the IOMMUs are enabled because a
 	 * disabled IOMMU will never execute the commands we send
 	 */
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
+	for_each_iommu(iommu)
+		iommu_flush_all_caches(iommu);
 }
 
 static int amd_iommu_suspend(void)
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index cd1ffed4ee22..289e92862fd9 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -177,7 +177,6 @@ static struct clocksource clocksource_apbt = {
 	.rating		= APBT_CLOCKSOURCE_RATING,
 	.read		= apbt_read_clocksource,
 	.mask		= APBT_MASK,
-	.shift		= APBT_SHIFT,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.resume		= apbt_restart_clocksource,
 };
@@ -543,14 +542,7 @@ static int apbt_clocksource_register(void)
 	if (t1 == apbt_read_clocksource(&clocksource_apbt))
 		panic("APBT counter not counting. APBT disabled\n");
 
-	/*
-	 * initialize and register APBT clocksource
-	 * convert that to ns/clock cycle
-	 * mult = (ns/c) * 2^APBT_SHIFT
-	 */
-	clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
-				       (unsigned long) apbt_freq, APBT_SHIFT);
-	clocksource_register(&clocksource_apbt);
+	clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 86d1ad4962a7..3d2661ca6542 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -30,6 +30,22 @@
 #include <asm/amd_nb.h>
 #include <asm/x86_init.h>
 
+/*
+ * Using 512M as goal, in case kexec will load kernel_big
+ * that will do the on-position decompress, and could overlap with
+ * with the gart aperture that is used.
+ * Sequence:
+ * kernel_small
+ * ==> kexec (with kdump trigger path or gart still enabled)
+ * ==> kernel_small (gart area become e820_reserved)
+ * ==> kexec (with kdump trigger path or gart still enabled)
+ * ==> kerne_big (uncompressed size will be big than 64M or 128M)
+ * So don't use 512M below as gart iommu, leave the space for kernel
+ * code for safe.
+ */
+#define GART_MIN_ADDR	(512ULL << 20)
+#define GART_MAX_ADDR	(1ULL   << 32)
+
 int gart_iommu_aperture;
 int gart_iommu_aperture_disabled __initdata;
 int gart_iommu_aperture_allowed __initdata;
@@ -70,21 +86,9 @@ static u32 __init allocate_aperture(void)
 	 * memory. Unfortunately we cannot move it up because that would
 	 * make the IOMMU useless.
 	 */
-	/*
-	 * using 512M as goal, in case kexec will load kernel_big
-	 * that will do the on position decompress, and  could overlap with
-	 * that position with gart that is used.
-	 * sequende:
-	 * kernel_small
-	 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
-	 * ==> kernel_small(gart area become e820_reserved)
-	 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
-	 * ==> kerne_big (uncompressed size will be big than 64M or 128M)
-	 * so don't use 512M below as gart iommu, leave the space for kernel
-	 * code for safe
-	 */
-	addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
-	if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+	addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
+				      aper_size, aper_size);
+	if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) {
 		printk(KERN_ERR
 			"Cannot allocate aperture memory hole (%lx,%uK)\n",
 				addr, aper_size>>10);
@@ -499,7 +503,7 @@ out:
 		 * Don't enable translation yet but enable GART IO and CPU
 		 * accesses and set DISTLBWALKPRB since GART table memory is UC.
 		 */
-		u32 ctl = DISTLBWALKPRB | aper_order << 1;
+		u32 ctl = aper_order << 1;
 
 		bus = amd_nb_bus_dev_ranges[i].bus;
 		dev_base = amd_nb_bus_dev_ranges[i].dev_base;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fabf01eff771..f92a8e5d1e21 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-	if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
+	if (this_cpu_has(X86_FEATURE_ARAT)) {
 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 		/* Make LAPIC timer preferrable over percpu HPET */
 		lapic_clockevent.rating = 150;
@@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void)
 	/* always use the value from LDR */
 	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
 		logical_smp_processor_id();
+
+	/*
+	 * Some NUMA implementations (NUMAQ) don't initialize apicid to
+	 * node mapping during NUMA init.  Now that logical apicid is
+	 * guaranteed to be known, give it another chance.  This is already
+	 * a bit too late - percpu allocation has already happened without
+	 * proper NUMA affinity.
+	 */
+	if (apic->x86_32_numa_cpu_node)
+		set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
+				   apic->x86_32_numa_cpu_node(cpu));
 #endif
 
 	/*
@@ -1812,30 +1823,41 @@ void smp_spurious_interrupt(struct pt_regs *regs)
  */
 void smp_error_interrupt(struct pt_regs *regs)
 {
-	u32 v, v1;
+	u32 v0, v1;
+	u32 i = 0;
+	static const char * const error_interrupt_reason[] = {
+		"Send CS error",		/* APIC Error Bit 0 */
+		"Receive CS error",		/* APIC Error Bit 1 */
+		"Send accept error",		/* APIC Error Bit 2 */
+		"Receive accept error",		/* APIC Error Bit 3 */
+		"Redirectable IPI",		/* APIC Error Bit 4 */
+		"Send illegal vector",		/* APIC Error Bit 5 */
+		"Received illegal vector",	/* APIC Error Bit 6 */
+		"Illegal register address",	/* APIC Error Bit 7 */
+	};
 
 	exit_idle();
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
+	v0 = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
 	v1 = apic_read(APIC_ESR);
 	ack_APIC_irq();
 	atomic_inc(&irq_err_count);
 
-	/*
-	 * Here is what the APIC error bits mean:
-	 * 0: Send CS error
-	 * 1: Receive CS error
-	 * 2: Send accept error
-	 * 3: Receive accept error
-	 * 4: Reserved
-	 * 5: Send illegal vector
-	 * 6: Received illegal vector
-	 * 7: Illegal register address
-	 */
-	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
+	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
+		    smp_processor_id(), v0 , v1);
+
+	v1 = v1 & 0xff;
+	while (v1) {
+		if (v1 & 0x1)
+			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
+		i++;
+		v1 >>= 1;
+	};
+
+	apic_printk(APIC_DEBUG, KERN_CONT "\n");
+
 	irq_exit();
 }
 
@@ -2003,21 +2025,6 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-#ifdef CONFIG_X86_32
-int default_x86_32_numa_cpu_node(int cpu)
-{
-#ifdef CONFIG_NUMA
-	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-
-	if (apicid != BAD_APICID)
-		return __apicid_to_node[apicid];
-	return NUMA_NO_NODE;
-#else
-	return 0;
-#endif
-}
-#endif
-
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index f1baa2dc087a..775b82bc655c 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v)
 	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
 }
 
-#ifdef CONFIG_X86_32
-static int noop_x86_32_numa_cpu_node(int cpu)
-{
-	/* we're always on node 0 */
-	return 0;
-}
-#endif
-
 struct apic apic_noop = {
 	.name				= "noop",
 	.probe				= noop_probe,
@@ -195,6 +187,5 @@ struct apic apic_noop = {
 
 #ifdef CONFIG_X86_32
 	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
-	.x86_32_numa_cpu_node		= noop_x86_32_numa_cpu_node,
 #endif
 };
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 541a2e431659..d84ac5a584b5 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -253,5 +253,4 @@ struct apic apic_bigsmp = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 3e9de4854c5b..70533de5bd29 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void)
 		nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
 }
 
-static int es7000_numa_cpu_node(int cpu)
-{
-	return 0;
-}
-
 static int es7000_cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
@@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
-	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
-	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 68df09bba92e..45fd33d1fd3a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
-				      struct io_apic_irq_attr *attr);
+static int io_apic_setup_irq_pin(unsigned int irq, int node,
+				 struct io_apic_irq_attr *attr);
 
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
@@ -3570,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
-int
+static int
 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 {
 	struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -3585,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 	return ret;
 }
 
-static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
-				      struct io_apic_irq_attr *attr)
+int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+			       struct io_apic_irq_attr *attr)
 {
 	unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
 	int ret;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 6273eee5134b..30f13319e24b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -48,8 +48,6 @@
 #include <asm/e820.h>
 #include <asm/ipi.h>
 
-#define	MB_TO_PAGES(addr)		((addr) << (20 - PAGE_SHIFT))
-
 int found_numaq;
 
 /*
@@ -79,31 +77,20 @@ int					quad_local_to_mp_bus_id[NR_CPUS/4][4];
 static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
 {
 	struct eachquadmem *eq = scd->eq + node;
+	u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
+	u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
+	int ret;
 
-	node_set_online(node);
-
-	/* Convert to pages */
-	node_start_pfn[node] =
-		 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
-
-	node_end_pfn[node] =
-		 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
-
-	memblock_x86_register_active_regions(node, node_start_pfn[node],
-						node_end_pfn[node]);
-
-	memory_present(node, node_start_pfn[node], node_end_pfn[node]);
-
-	node_remap_size[node] = node_memmap_size_bytes(node,
-					node_start_pfn[node],
-					node_end_pfn[node]);
+	node_set(node, numa_nodes_parsed);
+	ret = numa_add_memblk(node, start, end);
+	BUG_ON(ret < 0);
 }
 
 /*
  * Function: smp_dump_qct()
  *
  * Description: gets memory layout from the quad config table.  This
- * function also updates node_online_map with the nodes (quads) present.
+ * function also updates numa_nodes_parsed with the nodes (quads) present.
  */
 static void __init smp_dump_qct(void)
 {
@@ -112,7 +99,6 @@ static void __init smp_dump_qct(void)
 
 	scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
 
-	nodes_clear(node_online_map);
 	for_each_node(node) {
 		if (scd->quads_present31_0 & (1 << node))
 			numaq_register_node(node, scd);
@@ -282,14 +268,14 @@ static __init void early_check_numaq(void)
 	}
 }
 
-int __init get_memcfg_numaq(void)
+int __init numaq_numa_init(void)
 {
 	early_check_numaq();
 	if (!found_numaq)
-		return 0;
+		return -ENOENT;
 	smp_dump_qct();
 
-	return 1;
+	return 0;
 }
 
 #define NUMAQ_APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fc84c7b61108..6541e471fd91 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -172,7 +172,6 @@ struct apic apic_default = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
 
 extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index e4b8059b414a..35bcd7d995a1 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -551,5 +551,4 @@ struct apic apic_summit = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= summit_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc095..7acd2d2ac965 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
 #include <asm/smp.h>
 #include <asm/x86_init.h>
 #include <asm/emergency-restart.h>
+#include <asm/nmi.h>
+
+/* BMC sets a bit this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR				UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR			(UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK			(1UL << 63)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
+	unsigned long real_uv_nmi;
+	int bid;
+
 	if (reason != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	if (in_crash_kexec)
 		/* do nothing if entering the crash kernel */
 		return NOTIFY_OK;
+
 	/*
-	 * Use a lock so only one cpu prints at a time
-	 * to prevent intermixed output.
+	 * Each blade has an MMR that indicates when an NMI has been sent
+	 * to cpus on the blade. If an NMI is detected, atomically
+	 * clear the MMR and update a per-blade NMI count used to
+	 * cause each cpu on the blade to notice a new NMI.
+	 */
+	bid = uv_numa_blade_id();
+	real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+
+	if (unlikely(real_uv_nmi)) {
+		spin_lock(&uv_blade_info[bid].nmi_lock);
+		real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+		if (real_uv_nmi) {
+			uv_blade_info[bid].nmi_count++;
+			uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+		}
+		spin_unlock(&uv_blade_info[bid].nmi_lock);
+	}
+
+	if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
+		return NOTIFY_DONE;
+
+	__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
+
+	/*
+	 * Use a lock so only one cpu prints at a time.
+	 * This prevents intermixed output.
 	 */
 	spin_lock(&uv_nmi_lock);
-	pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
+	pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
 	dump_stack();
 	spin_unlock(&uv_nmi_lock);
 
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 }
 
 static struct notifier_block uv_dump_stack_nmi_nb = {
-	.notifier_call	= uv_handle_nmi
+	.notifier_call	= uv_handle_nmi,
+	.priority = NMI_LOCAL_LOW_PRIOR - 1,
 };
 
 void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
 	BUG_ON(!uv_blade_info);
+
 	for (blade = 0; blade < uv_num_possible_blades(); blade++)
 		uv_blade_info[blade].memory_nid = -1;
 
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
 			uv_blade_info[blade].pnode = pnode;
 			uv_blade_info[blade].nr_possible_cpus = 0;
 			uv_blade_info[blade].nr_online_cpus = 0;
+			spin_lock_init(&uv_blade_info[blade].nmi_lock);
 			max_pnode = max(pnode, max_pnode);
 			blade++;
 		}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0b4be431c620..3bfa02235965 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
 #include <linux/kthread.h>
 #include <linux/jiffies.h>
 #include <linux/acpi.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -1237,7 +1238,7 @@ static int suspend(int vetoable)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
+	syscore_suspend();
 
 	local_irq_enable();
 
@@ -1255,7 +1256,7 @@ static int suspend(int vetoable)
 		apm_error("suspend", err);
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
 
-	sysdev_resume();
+	syscore_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
@@ -1279,7 +1280,7 @@ static void standby(void)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
+	syscore_suspend();
 	local_irq_enable();
 
 	err = set_system_power_state(APM_STATE_STANDBY);
@@ -1287,7 +1288,7 @@ static void standby(void)
 		apm_error("standby", err);
 
 	local_irq_disable();
-	sysdev_resume();
+	syscore_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3f0ebe429a01..6042981d0309 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
-obj-$(CONFIG_CPU_FREQ)			+= cpufreq/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3ecece0217ef..6f9d1f6063e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,8 +613,27 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #endif
 
 	/* As a rule processors have APIC timer running in deep C states */
-	if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
+	if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
 		set_cpu_cap(c, X86_FEATURE_ARAT);
+
+	/*
+	 * Disable GART TLB Walk Errors on Fam10h. We do this here
+	 * because this is always needed when GART is enabled, even in a
+	 * kernel which has no MCE support built in.
+	 */
+	if (c->x86 == 0x10) {
+		/*
+		 * BIOS should disable GartTlbWlk Errors themself. If
+		 * it doesn't do it here as suggested by the BKDG.
+		 *
+		 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
+		 */
+		u64 mask;
+
+		rdmsrl(MSR_AMD64_MCx_MASK(4), mask);
+		mask |= (1 << 10);
+		wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
+	}
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2ced0074a45..cbc70a27430c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -254,6 +254,25 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 }
 #endif
 
+static int disable_smep __initdata;
+static __init int setup_disable_smep(char *arg)
+{
+	disable_smep = 1;
+	return 1;
+}
+__setup("nosmep", setup_disable_smep);
+
+static __init void setup_smep(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_SMEP)) {
+		if (unlikely(disable_smep)) {
+			setup_clear_cpu_cap(X86_FEATURE_SMEP);
+			clear_in_cr4(X86_CR4_SMEP);
+		} else
+			set_in_cr4(X86_CR4_SMEP);
+	}
+}
+
 /*
  * Some CPU features depend on higher CPUID levels, which may not always
  * be available due to CPUID level capping or broken virtualization
@@ -565,8 +584,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
 
 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
 
-		if (eax > 0)
-			c->x86_capability[9] = ebx;
+		c->x86_capability[9] = ebx;
 	}
 
 	/* AMD-defined flags: level 0x80000001 */
@@ -668,6 +686,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	c->cpu_index = 0;
 #endif
 	filter_cpuid_features(c, false);
+
+	setup_smep(c);
 }
 
 void __init early_cpu_init(void)
@@ -753,6 +773,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 #endif
 	}
 
+	setup_smep(c);
+
 	get_model_name(c); /* Default name */
 
 	detect_nopl(c);
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
deleted file mode 100644
index bd54bf67e6fb..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
-# K8 systems. ACPI is preferred to all other hardware-specific drivers.
-# speedstep-* is preferred over p4-clockmod.
-
-obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o mperf.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o mperf.o
-obj-$(CONFIG_X86_PCC_CPUFREQ)		+= pcc-cpufreq.o
-obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
-obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
-obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
-obj-$(CONFIG_X86_E_POWERSAVER)		+= e_powersaver.o
-obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
-obj-$(CONFIG_SC520_CPUFREQ)		+= sc520_freq.o
-obj-$(CONFIG_X86_LONGRUN)		+= longrun.o  
-obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
-obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
-obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
-obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
-obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
-obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
-obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index df86bc8c859d..1edf5ba4fb2b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,10 +29,10 @@
 
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
+	u64 misc_enable;
+
 	/* Unmask CPUID levels if masked: */
 	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
-		u64 misc_enable;
-
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 
 		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	 * (model 2) with the same problem.
 	 */
 	if (c->x86 == 15) {
-		u64 misc_enable;
-
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 
 		if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 		}
 	}
 #endif
+
+	/*
+	 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
+	 * clear the fast string and enhanced fast string CPU capabilities.
+	 */
+	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
+		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+		if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
+			printk(KERN_INFO "Disabled fast string operations\n");
+			setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
+			setup_clear_cpu_cap(X86_FEATURE_ERMS);
+		}
+	}
 }
 
 #ifdef CONFIG_X86_32
@@ -400,12 +411,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 		switch (c->x86_model) {
 		case 5:
-			if (c->x86_mask == 0) {
-				if (l2 == 0)
-					p = "Celeron (Covington)";
-				else if (l2 == 256)
-					p = "Mobile Pentium II (Dixon)";
-			}
+			if (l2 == 0)
+				p = "Celeron (Covington)";
+			else if (l2 == 256)
+				p = "Mobile Pentium II (Dixon)";
 			break;
 
 		case 6:
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1ce1af2899df..c105c533ed94 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -327,7 +327,6 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 
-	l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
@@ -454,27 +453,16 @@ int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
 {
 	int ret = 0;
 
-#define SUBCACHE_MASK	(3UL << 20)
-#define SUBCACHE_INDEX	0xfff
-
-	/*
-	 * check whether this slot is already used or
-	 * the index is already disabled
-	 */
+	/*  check if @slot is already used or the index is already disabled */
 	ret = amd_get_l3_disable_slot(l3, slot);
 	if (ret >= 0)
 		return -EINVAL;
 
-	/*
-	 * check whether the other slot has disabled the
-	 * same index already
-	 */
-	if (index == amd_get_l3_disable_slot(l3, !slot))
+	if (index > l3->indices)
 		return -EINVAL;
 
-	/* do not allow writes outside of allowed bits */
-	if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
-	    ((index & SUBCACHE_INDEX) > l3->indices))
+	/* check whether the other slot has disabled the same index already */
+	if (index == amd_get_l3_disable_slot(l3, !slot))
 		return -EINVAL;
 
 	amd_l3_disable_index(l3, cpu, slot, index);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3385ea26f684..ff1ae9b6464d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,20 +105,6 @@ static int			cpu_missing;
 ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
 EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
 
-static int default_decode_mce(struct notifier_block *nb, unsigned long val,
-			       void *data)
-{
-	pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
-	pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
-
-	return NOTIFY_STOP;
-}
-
-static struct notifier_block mce_dec_nb = {
-	.notifier_call = default_decode_mce,
-	.priority      = -1,
-};
-
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -212,6 +198,8 @@ void mce_log(struct mce *mce)
 
 static void print_mce(struct mce *m)
 {
+	int ret = 0;
+
 	pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
 	       m->extcpu, m->mcgstatus, m->bank, m->status);
 
@@ -239,7 +227,11 @@ static void print_mce(struct mce *m)
 	 * Print out human-readable details about the MCE error,
 	 * (if the CPU has an implementation for that)
 	 */
-	atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+	ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+	if (ret == NOTIFY_STOP)
+		return;
+
+	pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
 }
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -590,7 +582,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
 			mce_log(&m);
 			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
-			add_taint(TAINT_MACHINE_CHECK);
 		}
 
 		/*
@@ -1722,8 +1713,6 @@ __setup("mce", mcheck_enable);
 
 int __init mcheck_init(void)
 {
-	atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
-
 	mcheck_intel_therm_init();
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596e..bb0adad35143 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
 out_free:
 	if (b) {
 		kobject_put(&b->kobj);
+		list_del(&b->miscj);
 		kfree(b);
 	}
 	return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97f..27c625178bf1 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -187,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level)
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
-
-		add_taint(TAINT_MACHINE_CHECK);
 		return 1;
 	}
 	if (old_event) {
@@ -355,7 +353,6 @@ static void notify_thresholds(__u64 msr_val)
 static void intel_thermal_interrupt(void)
 {
 	__u64 msr_val;
-	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
@@ -367,19 +364,19 @@ static void intel_thermal_interrupt(void)
 				CORE_LEVEL) != 0)
 		mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
 
-	if (cpu_has(c, X86_FEATURE_PLN))
+	if (this_cpu_has(X86_FEATURE_PLN))
 		if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
 					CORE_LEVEL) != 0)
 			mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
 
-	if (cpu_has(c, X86_FEATURE_PTS)) {
+	if (this_cpu_has(X86_FEATURE_PTS)) {
 		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
 		if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
 					THERMAL_THROTTLING_EVENT,
 					PACKAGE_LEVEL) != 0)
 			mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
-		if (cpu_has(c, X86_FEATURE_PLN))
+		if (this_cpu_has(X86_FEATURE_PLN))
 			if (therm_throt_process(msr_val &
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
@@ -393,7 +390,6 @@ static void unexpected_thermal_interrupt(void)
 {
 	printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
 			smp_processor_id());
-	add_taint(TAINT_MACHINE_CHECK);
 }
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -446,18 +442,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 
+	h = lvtthmr_init;
 	/*
 	 * The initial value of thermal LVT entries on all APs always reads
 	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
 	 * sequence to them and LVT registers are reset to 0s except for
 	 * the mask bits which are set to 1s when APs receive INIT IPI.
-	 * Always restore the value that BIOS has programmed on AP based on
-	 * BSP's info we saved since BIOS is always setting the same value
-	 * for all threads/cores
+	 * If BIOS takes over the thermal interrupt and sets its interrupt
+	 * delivery mode to SMI (not fixed), it restores the value that the
+	 * BIOS has programmed on AP based on BSP's info we saved since BIOS
+	 * is always setting the same value for all threads/cores.
 	 */
-	apic_write(APIC_LVTTHMR, lvtthmr_init);
+	if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
+		apic_write(APIC_LVTTHMR, lvtthmr_init);
 
-	h = lvtthmr_init;
 
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index eed3673a8656..3a0338b4b179 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,7 @@
 #include <asm/nmi.h>
 #include <asm/compat.h>
 #include <asm/smp.h>
+#include <asm/alternative.h>
 
 #if 0
 #undef wrmsrl
@@ -363,12 +364,18 @@ again:
 	return new_raw_count;
 }
 
-/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
 static inline int x86_pmu_addr_offset(int index)
 {
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-		return index << 1;
-	return index;
+	int offset;
+
+	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
+	alternative_io(ASM_NOP2,
+		       "shll $1, %%eax",
+		       X86_FEATURE_PERFCTR_CORE,
+		       "=a" (offset),
+		       "a"  (index));
+
+	return offset;
 }
 
 static inline unsigned int x86_pmu_config_addr(int index)
@@ -586,8 +593,12 @@ static int x86_setup_perfctr(struct perf_event *event)
 			return -EOPNOTSUPP;
 	}
 
+	/*
+	 * Do not allow config1 (extended registers) to propagate,
+	 * there's no sane user-space generalization yet:
+	 */
 	if (attr->type == PERF_TYPE_RAW)
-		return x86_pmu_extra_regs(event->attr.config, event);
+		return 0;
 
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, event);
@@ -609,8 +620,8 @@ static int x86_setup_perfctr(struct perf_event *event)
 	/*
 	 * Branch tracing:
 	 */
-	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
-	    (hwc->sample_period == 1)) {
+	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+	    !attr->freq && hwc->sample_period == 1) {
 		/* BTS is not supported by this architecture. */
 		if (!x86_pmu.bts_active)
 			return -EOPNOTSUPP;
@@ -1284,6 +1295,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
+	/*
+	 * Some chipsets need to unmask the LVTPC in a particular spot
+	 * inside the nmi handler.  As a result, the unmasking was pushed
+	 * into all the nmi handlers.
+	 *
+	 * This generic handler doesn't seem to have any issues where the
+	 * unmasking occurs so it was left at the top.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		if (!test_bit(idx, cpuc->active_mask)) {
 			/*
@@ -1370,8 +1391,6 @@ perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 	}
 
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-
 	handled = x86_pmu.handle_irq(args->regs);
 	if (!handled)
 		return NOTIFY_DONE;
@@ -1754,17 +1773,6 @@ static struct pmu pmu = {
  * callchain support
  */
 
-static void
-backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
 static int backtrace_stack(void *data, char *name)
 {
 	return 0;
@@ -1778,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops backtrace_ops = {
-	.warning		= backtrace_warning,
-	.warning_symbol		= backtrace_warning_symbol,
 	.stack			= backtrace_stack,
 	.address		= backtrace_address,
 	.walk_stack		= print_context_stack_bp,
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 461f62bbd774..fe29c1d2219e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(L1D) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
  */
 static const u64 amd_perfmon_event_map[] =
 {
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
+  [PERF_COUNT_HW_CPU_CYCLES]			= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]			= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]		= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]			= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]		= 0x00c2,
+  [PERF_COUNT_HW_BRANCH_MISSES]			= 0x00c3,
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
+  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
 };
 
 static u64 amd_pmu_event_map(int hw_event)
@@ -427,7 +429,9 @@ static __initconst const struct x86_pmu amd_pmu = {
  *
  * Exceptions:
  *
+ * 0x000	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
  * 0x003	FP	PERF_CTL[3]
+ * 0x004	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
  * 0x00B	FP	PERF_CTL[3]
  * 0x00D	FP	PERF_CTL[3]
  * 0x023	DE	PERF_CTL[2:0]
@@ -448,6 +452,8 @@ static __initconst const struct x86_pmu amd_pmu = {
  * 0x0DF	LS	PERF_CTL[5:0]
  * 0x1D6	EX	PERF_CTL[5:0]
  * 0x1D8	EX	PERF_CTL[5:0]
+ *
+ * (*) depending on the umask all FPU counters may be used
  */
 
 static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
@@ -460,18 +466,28 @@ static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 static struct event_constraint *
 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	unsigned int event_code = amd_get_event_code(&event->hw);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int event_code = amd_get_event_code(hwc);
 
 	switch (event_code & AMD_EVENT_TYPE_MASK) {
 	case AMD_EVENT_FP:
 		switch (event_code) {
+		case 0x000:
+			if (!(hwc->config & 0x0000F000ULL))
+				break;
+			if (!(hwc->config & 0x00000F00ULL))
+				break;
+			return &amd_f15_PMC3;
+		case 0x004:
+			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+				break;
+			return &amd_f15_PMC3;
 		case 0x003:
 		case 0x00B:
 		case 0x00D:
 			return &amd_f15_PMC3;
-		default:
-			return &amd_f15_PMC53;
 		}
+		return &amd_f15_PMC53;
 	case AMD_EVENT_LS:
 	case AMD_EVENT_DC:
 	case AMD_EVENT_EX_LS:
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8fc2b2cee1da..41178c826c48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -25,7 +25,7 @@ struct intel_percore {
 /*
  * Intel PerfMon, used on Core and later.
  */
-static const u64 intel_perfmon_event_map[] =
+static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
 {
   [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
   [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
@@ -36,7 +36,7 @@ static const u64 intel_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
-static struct event_constraint intel_core_event_constraints[] =
+static struct event_constraint intel_core_event_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_core2_event_constraints[] =
+static struct event_constraint intel_core2_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_nehalem_event_constraints[] =
+static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_nehalem_extra_regs[] =
+static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
 	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_nehalem_percore_constraints[] =
+static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0xb7, 0),
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_westmere_event_constraints[] =
+static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_snb_event_constraints[] =
+static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_westmere_extra_regs[] =
+static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
 	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
 	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_westmere_percore_constraints[] =
+static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0xb7, 0),
 	INTEL_EVENT_CONSTRAINT(0xbb, 0),
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_gen_event_constraints[] =
+static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
 	},
  },
  [ C(LL  ) ] = {
-	/*
-	 * TBD: Need Off-core Response Performance Monitoring support
-	 */
 	[ C(OP_READ) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_WRITE) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
  },
  [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
  },
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	/*
 	 * Use RFO, not WRITEBACK, because a write miss would typically occur
 	 * on RFO.
 	 */
 	[ C(OP_WRITE) ] = {
-		/* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
-		[ C(RESULT_ACCESS) ] = 0x01bb,
-		/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
 		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
  },
  [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
 };
 
 /*
- * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
  */
 
-#define DMND_DATA_RD     (1 << 0)
-#define DMND_RFO         (1 << 1)
-#define DMND_WB          (1 << 3)
-#define PF_DATA_RD       (1 << 4)
-#define PF_DATA_RFO      (1 << 5)
-#define RESP_UNCORE_HIT  (1 << 8)
-#define RESP_MISS        (0xf600) /* non uncore hit */
+#define NHM_DMND_DATA_RD	(1 << 0)
+#define NHM_DMND_RFO		(1 << 1)
+#define NHM_DMND_IFETCH		(1 << 2)
+#define NHM_DMND_WB		(1 << 3)
+#define NHM_PF_DATA_RD		(1 << 4)
+#define NHM_PF_DATA_RFO		(1 << 5)
+#define NHM_PF_IFETCH		(1 << 6)
+#define NHM_OFFCORE_OTHER	(1 << 7)
+#define NHM_UNCORE_HIT		(1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP	(1 << 9)
+#define NHM_OTHER_CORE_HITM	(1 << 10)
+        			/* reserved */
+#define NHM_REMOTE_CACHE_FWD	(1 << 12)
+#define NHM_REMOTE_DRAM		(1 << 13)
+#define NHM_LOCAL_DRAM		(1 << 14)
+#define NHM_NON_DRAM		(1 << 15)
+
+#define NHM_ALL_DRAM		(NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+
+#define NHM_DMND_READ		(NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE		(NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH	(NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT	(NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS	(NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS	(NHM_L3_HIT|NHM_L3_MISS)
 
 static __initconst const u64 nehalem_hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
 {
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = DMND_DATA_RD|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = DMND_RFO|DMND_WB|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
 	},
  }
 };
@@ -391,12 +408,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids
 {
  [ C(L1D) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
 	},
 	[ C(OP_PREFETCH) ] = {
 		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
@@ -933,6 +950,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
+	/*
+	 * Some chipsets need to unmask the LVTPC in a particular spot
+	 * inside the nmi handler.  As a result, the unmasking was pushed
+	 * into all the nmi handlers.
+	 *
+	 * This handler doesn't seem to have any issues with the unmasking
+	 * so it was left at the top.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
 	intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	status = intel_pmu_get_status();
@@ -998,6 +1025,9 @@ intel_bts_constraints(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned int hw_event, bts_event;
 
+	if (event->attr.freq)
+		return NULL;
+
 	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
 	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 
@@ -1305,7 +1335,7 @@ static void intel_clovertown_quirks(void)
 	 * AJ106 could possibly be worked around by not allowing LBR
 	 *       usage from PEBS, including the fixup.
 	 * AJ68  could possibly be worked around by always programming
-	 * 	 a pebs_event_reset[0] value and coping with the lost events.
+	 *	 a pebs_event_reset[0] value and coping with the lost events.
 	 *
 	 * But taken together it might just make sense to not enable PEBS on
 	 * these chips.
@@ -1409,6 +1439,23 @@ static __init int intel_pmu_init(void)
 		x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
+
+		/* UOPS_ISSUED.STALLED_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+
+		if (ebx & 0x40) {
+			/*
+			 * Erratum AAJ80 detected, we work it around by using
+			 * the BR_MISP_EXEC.ANY event. This will over-count
+			 * branch-misses, but it's still much better than the
+			 * architectural event which is often completely bogus:
+			 */
+			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+
+			pr_cont("erratum AAJ80 worked around, ");
+		}
 		pr_cont("Nehalem events, ");
 		break;
 
@@ -1425,6 +1472,7 @@ static __init int intel_pmu_init(void)
 
 	case 37: /* 32 nm nehalem, "Clarkdale" */
 	case 44: /* 32 nm nehalem, "Gulftown" */
+	case 47: /* 32 nm Xeon E7 */
 		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -1437,6 +1485,12 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
+
+		/* UOPS_ISSUED.STALLED_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+
 		pr_cont("Westmere events, ");
 		break;
 
@@ -1448,6 +1502,12 @@ static __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_events;
+
+		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
+
 		pr_cont("SandyBridge events, ");
 		break;
 
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index c2520e178d32..ead584fb6a7d 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 		.escr_emask	=
-		P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
+			P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	[P4_EVENT_X87_ASSIST] = {
@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -947,14 +946,23 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_event_set_period(event))
 			continue;
 		if (perf_event_overflow(event, 1, &data, regs))
-			p4_pmu_disable_event(event);
+			x86_pmu_stop(event, 0);
 	}
 
-	if (handled) {
-		/* p4 quirk: unmask it again */
-		apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+	if (handled)
 		inc_irq_stat(apic_perf_irqs);
-	}
+
+	/*
+	 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
+	 * been observed that the OVF bit flag has to be cleared first _before_
+	 * the LVTPC can be unmasked.
+	 *
+	 * The reason is the NMI line will continue to be asserted while the OVF
+	 * bit is set.  This causes a second NMI to generate if the LVTPC is
+	 * unmasked before the OVF bit is cleared, leading to unknown NMI
+	 * messages.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
 
 	return handled;
 }
@@ -1188,7 +1196,7 @@ static __init int p4_pmu_init(void)
 {
 	unsigned int low, high;
 
-	/* If we get stripped -- indexig fails */
+	/* If we get stripped -- indexing fails */
 	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
 
 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 706a9fb46a58..e90f08458e6b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -391,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
 
 	set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
 
-	return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr);
+	return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
 }
 
 static void __init ioapic_add_ofnode(struct device_node *np)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da4..1aae78f775fc 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo,
 }
 EXPORT_SYMBOL_GPL(print_context_stack_bp);
 
-
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	printk(data);
-	print_symbol(msg, symbol);
-	printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
-	printk("%s%s\n", (char *)data, msg);
-}
-
 static int print_trace_stack(void *data, char *name)
 {
 	printk("%s <%s> ", (char *)data, name);
@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops print_trace_ops = {
-	.warning		= print_trace_warning,
-	.warning_symbol		= print_trace_warning_symbol,
 	.stack			= print_trace_stack,
 	.address		= print_trace_address,
 	.walk_stack		= print_context_stack,
@@ -279,7 +263,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index a93742a57468..0ba15a6cc57e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -260,9 +260,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	return mod_code_status;
 }
 
-static unsigned char *ftrace_nop_replace(void)
+static const unsigned char *ftrace_nop_replace(void)
 {
-	return ideal_nop5;
+	return ideal_nops[NOP_ATOMIC5];
 }
 
 static int
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index d6d6bb361931..3bb08509a7a1 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -23,7 +23,6 @@
 static void __init i386_default_early_setup(void)
 {
 	/* Initialize 32bit specific setup functions */
-	x86_init.resources.probe_roms = probe_roms;
 	x86_init.resources.reserve_resources = i386_reserve_resources;
 	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index bfe8f729e086..6781765b3a0d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -217,7 +217,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { }
 /*
  * Common hpet info
  */
-static unsigned long hpet_period;
+static unsigned long hpet_freq;
 
 static void hpet_legacy_set_mode(enum clock_event_mode mode,
 			  struct clock_event_device *evt);
@@ -232,7 +232,6 @@ static struct clock_event_device hpet_clockevent = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= hpet_legacy_set_mode,
 	.set_next_event = hpet_legacy_next_event,
-	.shift		= 32,
 	.irq		= 0,
 	.rating		= 50,
 };
@@ -290,28 +289,12 @@ static void hpet_legacy_clockevent_register(void)
 	hpet_enable_legacy_int();
 
 	/*
-	 * The mult factor is defined as (include/linux/clockchips.h)
-	 *  mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h)
-	 * hpet_period is in units of femtoseconds (per cycle), so
-	 *  mult/2^shift = cyc/ns = 10^6/hpet_period
-	 *  mult = (10^6 * 2^shift)/hpet_period
-	 *  mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period
-	 */
-	hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC,
-				      hpet_period, hpet_clockevent.shift);
-	/* Calculate the min / max delta */
-	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
-							   &hpet_clockevent);
-	/* Setup minimum reprogramming delta. */
-	hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
-							   &hpet_clockevent);
-
-	/*
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
 	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
-	clockevents_register_device(&hpet_clockevent);
+	clockevents_config_and_register(&hpet_clockevent, hpet_freq,
+					HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
@@ -549,7 +532,6 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 {
 	struct clock_event_device *evt = &hdev->evt;
-	uint64_t hpet_freq;
 
 	WARN_ON(cpu != smp_processor_id());
 	if (!(hdev->flags & HPET_DEV_VALID))
@@ -571,24 +553,10 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 
 	evt->set_mode = hpet_msi_set_mode;
 	evt->set_next_event = hpet_msi_next_event;
-	evt->shift = 32;
-
-	/*
-	 * The period is a femto seconds value. We need to calculate the
-	 * scaled math multiplication factor for nanosecond to hpet tick
-	 * conversion.
-	 */
-	hpet_freq = FSEC_PER_SEC;
-	do_div(hpet_freq, hpet_period);
-	evt->mult = div_sc((unsigned long) hpet_freq,
-				      NSEC_PER_SEC, evt->shift);
-	/* Calculate the max delta */
-	evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
-	/* 5 usec minimum reprogramming delta. */
-	evt->min_delta_ns = 5000;
-
 	evt->cpumask = cpumask_of(hdev->cpu);
-	clockevents_register_device(evt);
+
+	clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
+					0x7FFFFFFF);
 }
 
 #ifdef CONFIG_HPET
@@ -792,7 +760,6 @@ static struct clocksource clocksource_hpet = {
 static int hpet_clocksource_register(void)
 {
 	u64 start, now;
-	u64 hpet_freq;
 	cycle_t t1;
 
 	/* Start the counter */
@@ -819,24 +786,7 @@ static int hpet_clocksource_register(void)
 		return -ENODEV;
 	}
 
-	/*
-	 * The definition of mult is (include/linux/clocksource.h)
-	 * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc
-	 * so we first need to convert hpet_period to ns/cyc units:
-	 *  mult/2^shift = ns/cyc = hpet_period/10^6
-	 *  mult = (hpet_period * 2^shift)/10^6
-	 *  mult = (hpet_period << shift)/FSEC_PER_NSEC
-	 */
-
-	/* Need to convert hpet_period (fsec/cyc) to cyc/sec:
-	 *
-	 * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc)
-	 * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period
-	 */
-	hpet_freq = FSEC_PER_SEC;
-	do_div(hpet_freq, hpet_period);
 	clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
-
 	return 0;
 }
 
@@ -845,7 +795,9 @@ static int hpet_clocksource_register(void)
  */
 int __init hpet_enable(void)
 {
+	unsigned long hpet_period;
 	unsigned int id;
+	u64 freq;
 	int i;
 
 	if (!is_hpet_capable())
@@ -884,6 +836,14 @@ int __init hpet_enable(void)
 		goto out_nohpet;
 
 	/*
+	 * The period is a femto seconds value. Convert it to a
+	 * frequency.
+	 */
+	freq = FSEC_PER_SEC;
+	do_div(freq, hpet_period);
+	hpet_freq = freq;
+
+	/*
 	 * Read the HPET ID register to retrieve the IRQ routing
 	 * information and the number of channels
 	 */
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2dfd31597443..fb66dc9e36cb 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -93,7 +93,6 @@ static struct clock_event_device pit_ce = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= init_pit_timer,
 	.set_next_event = pit_next_event,
-	.shift		= 32,
 	.irq		= 0,
 };
 
@@ -108,90 +107,12 @@ void __init setup_pit_timer(void)
 	 * IO_APIC has been initialized.
 	 */
 	pit_ce.cpumask = cpumask_of(smp_processor_id());
-	pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
-	pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
-	pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
 
-	clockevents_register_device(&pit_ce);
+	clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF);
 	global_clock_event = &pit_ce;
 }
 
 #ifndef CONFIG_X86_64
-/*
- * Since the PIT overflows every tick, its not very useful
- * to just read by itself. So use jiffies to emulate a free
- * running counter:
- */
-static cycle_t pit_read(struct clocksource *cs)
-{
-	static int old_count;
-	static u32 old_jifs;
-	unsigned long flags;
-	int count;
-	u32 jifs;
-
-	raw_spin_lock_irqsave(&i8253_lock, flags);
-	/*
-	 * Although our caller may have the read side of xtime_lock,
-	 * this is now a seqlock, and we are cheating in this routine
-	 * by having side effects on state that we cannot undo if
-	 * there is a collision on the seqlock and our caller has to
-	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
-	 * jiffies as volatile despite the lock.  We read jiffies
-	 * before latching the timer count to guarantee that although
-	 * the jiffies value might be older than the count (that is,
-	 * the counter may underflow between the last point where
-	 * jiffies was incremented and the point where we latch the
-	 * count), it cannot be newer.
-	 */
-	jifs = jiffies;
-	outb_pit(0x00, PIT_MODE);	/* latch the count ASAP */
-	count = inb_pit(PIT_CH0);	/* read the latched count */
-	count |= inb_pit(PIT_CH0) << 8;
-
-	/* VIA686a test code... reset the latch if count > max + 1 */
-	if (count > LATCH) {
-		outb_pit(0x34, PIT_MODE);
-		outb_pit(LATCH & 0xff, PIT_CH0);
-		outb_pit(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-
-	/*
-	 * It's possible for count to appear to go the wrong way for a
-	 * couple of reasons:
-	 *
-	 *  1. The timer counter underflows, but we haven't handled the
-	 *     resulting interrupt and incremented jiffies yet.
-	 *  2. Hardware problem with the timer, not giving us continuous time,
-	 *     the counter does small "jumps" upwards on some Pentium systems,
-	 *     (see c't 95/10 page 335 for Neptun bug.)
-	 *
-	 * Previous attempts to handle these cases intelligently were
-	 * buggy, so we just do the simple thing now.
-	 */
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
-	count = (LATCH - 1) - count;
-
-	return (cycle_t)(jifs * LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-	.mult		= 0,
-	.shift		= 20,
-};
-
 static int __init init_pit_clocksource(void)
 {
 	 /*
@@ -205,10 +126,7 @@ static int __init init_pit_clocksource(void)
 	    pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
 		return 0;
 
-	pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
-
-	return clocksource_register(&pit_cs);
+	return clocksource_i8253_init();
 }
 arch_initcall(init_pit_clocksource);
-
 #endif /* !CONFIG_X86_64 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1cb0b9fc78dc..6c0802eb2f7f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -249,7 +249,7 @@ void fixup_irqs(void)
 
 		data = irq_desc_get_irq_data(desc);
 		affinity = data->affinity;
-		if (!irq_has_action(irq) ||
+		if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
 		    cpumask_subset(affinity, cpu_online_mask)) {
 			raw_spin_unlock(&desc->lock);
 			continue;
@@ -276,7 +276,8 @@ void fixup_irqs(void)
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
+		if (!irqd_can_move_in_process_context(data) &&
+		    !irqd_irq_disabled(data) && chip->irq_unmask)
 			chip->irq_unmask(data);
 
 		raw_spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 961b6b30ba90..3fee346ef545 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -34,7 +34,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 		code.offset = entry->target -
 				(entry->code + JUMP_LABEL_NOP_SIZE);
 	} else
-		memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
 	get_online_cpus();
 	mutex_lock(&text_mutex);
 	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
@@ -44,7 +44,8 @@ void arch_jump_label_transform(struct jump_entry *entry,
 
 void arch_jump_label_text_poke_early(jump_label_t addr)
 {
-	text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
+			JUMP_LABEL_NOP_SIZE);
 }
 
 #endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c969fd9d1566..f1a6244d7d93 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 					 struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long flags;
 
 	/* This is possible if op is under delayed unoptimizing */
 	if (kprobe_disabled(&op->kp))
 		return;
 
-	preempt_disable();
+	local_irq_save(flags);
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(&op->kp);
 	} else {
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 		opt_pre_handler(&op->kp, regs);
 		__this_cpu_write(current_kprobe, NULL);
 	}
-	preempt_enable_no_resched();
+	local_irq_restore(flags);
 }
 
 static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f98d3eafe07a..6389a6bca11b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -26,8 +26,6 @@
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
 
-#define KVM_SCALE 22
-
 static int kvmclock = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
@@ -120,8 +118,6 @@ static struct clocksource kvm_clock = {
 	.read = kvm_clock_get_cycles,
 	.rating = 400,
 	.mask = CLOCKSOURCE_MASK(64),
-	.mult = 1 << KVM_SCALE,
-	.shift = KVM_SCALE,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -203,7 +199,7 @@ void __init kvmclock_init(void)
 	machine_ops.crash_shutdown  = kvm_crash_shutdown;
 #endif
 	kvm_get_preset_lpj();
-	clocksource_register(&kvm_clock);
+	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
 	pv_info.paravirt_enabled = 1;
 	pv_info.name = "KVM";
 
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index ab23f1ad4bf1..52f256f2cc81 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/jump_label.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5a532ce646bf..6f9bfffb2720 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -715,17 +715,15 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
 	}
 }
 
-static int
+static int __init
 check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-	int ret = 0;
-
 	if (!mpc_new_phys || count <= mpc_new_length) {
 		WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
 		return -1;
 	}
 
-	return ret;
+	return 0;
 }
 #else /* CONFIG_X86_IO_APIC */
 static
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 55d745ec1181..35ccf75696eb 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 				struct iommu_table_entry *finish)
 {
 	struct iommu_table_entry *p, *q, *x;
-	char sym_p[KSYM_SYMBOL_LEN];
-	char sym_q[KSYM_SYMBOL_LEN];
 
 	/* Simple cyclic dependency checker. */
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(start, finish, p);
 		x = find_dependents_of(start, finish, q);
 		if (p == x) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
-					" on %s and vice-versa. BREAKING IT.\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
+			       p->detect, q->detect);
 			/* Heavy handed way..*/
 			x->depend = 0;
 		}
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(p, finish, p);
 		if (q && q > p) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
-					"should be called before %s!\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n",
+			       p->detect, q->detect);
 		}
 	}
 }
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms.c
index 071e7fea42e5..ba0a4cce53be 100644
--- a/arch/x86/kernel/probe_roms_32.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -73,6 +73,107 @@ static struct resource video_rom_resource = {
 	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
 };
 
+/* does this oprom support the given pci device, or any of the devices
+ * that the driver supports?
+ */
+static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device)
+{
+	struct pci_driver *drv = pdev->driver;
+	const struct pci_device_id *id;
+
+	if (pdev->vendor == vendor && pdev->device == device)
+		return true;
+
+	for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
+		if (id->vendor == vendor && id->device == device)
+			break;
+
+	return id && id->vendor;
+}
+
+static bool probe_list(struct pci_dev *pdev, unsigned short vendor,
+		       const unsigned char *rom_list)
+{
+	unsigned short device;
+
+	do {
+		if (probe_kernel_address(rom_list, device) != 0)
+			device = 0;
+
+		if (device && match_id(pdev, vendor, device))
+			break;
+
+		rom_list += 2;
+	} while (device);
+
+	return !!device;
+}
+
+static struct resource *find_oprom(struct pci_dev *pdev)
+{
+	struct resource *oprom = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
+		struct resource *res = &adapter_rom_resources[i];
+		unsigned short offset, vendor, device, list, rev;
+		const unsigned char *rom;
+
+		if (res->end == 0)
+			break;
+
+		rom = isa_bus_to_virt(res->start);
+		if (probe_kernel_address(rom + 0x18, offset) != 0)
+			continue;
+
+		if (probe_kernel_address(rom + offset + 0x4, vendor) != 0)
+			continue;
+
+		if (probe_kernel_address(rom + offset + 0x6, device) != 0)
+			continue;
+
+		if (match_id(pdev, vendor, device)) {
+			oprom = res;
+			break;
+		}
+
+		if (probe_kernel_address(rom + offset + 0x8, list) == 0 &&
+		    probe_kernel_address(rom + offset + 0xc, rev) == 0 &&
+		    rev >= 3 && list &&
+		    probe_list(pdev, vendor, rom + offset + list)) {
+			oprom = res;
+			break;
+		}
+	}
+
+	return oprom;
+}
+
+void *pci_map_biosrom(struct pci_dev *pdev)
+{
+	struct resource *oprom = find_oprom(pdev);
+
+	if (!oprom)
+		return NULL;
+
+	return ioremap(oprom->start, resource_size(oprom));
+}
+EXPORT_SYMBOL(pci_map_biosrom);
+
+void pci_unmap_biosrom(void __iomem *image)
+{
+	iounmap(image);
+}
+EXPORT_SYMBOL(pci_unmap_biosrom);
+
+size_t pci_biosrom_size(struct pci_dev *pdev)
+{
+	struct resource *oprom = find_oprom(pdev);
+
+	return oprom ? resource_size(oprom) : 0;
+}
+EXPORT_SYMBOL(pci_biosrom_size);
+
 #define ROMSIGNATURE 0xaa55
 
 static int __init romsignature(const unsigned char *rom)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d46cbe46b7ab..88a90a977f8e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
 	if (!need_resched()) {
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -465,7 +465,7 @@ static void mwait_idle(void)
 	if (!need_resched()) {
 		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
 		trace_cpu_idle(1, smp_processor_id());
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72a..f65e5b521dbd 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 	unsigned len, type;
 	struct perf_event *bp;
 
+	if (ptrace_get_breakpoints(tsk) < 0)
+		return -ESRCH;
+
 	data &= ~DR_CONTROL_RESERVED;
 	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
 restore:
@@ -655,6 +658,9 @@ restore:
 		}
 		goto restore;
 	}
+
+	ptrace_put_breakpoints(tsk);
+
 	return ((orig_ret < 0) ? orig_ret : rc);
 }
 
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
 
 	if (n < HBP_NUM) {
 		struct perf_event *bp;
+
+		if (ptrace_get_breakpoints(tsk) < 0)
+			return -ESRCH;
+
 		bp = thread->ptrace_bps[n];
 		if (!bp)
-			return 0;
-		val = bp->hw.info.address;
+			val = 0;
+		else
+			val = bp->hw.info.address;
+
+		ptrace_put_breakpoints(tsk);
 	} else if (n == 6) {
 		val = thread->debugreg6;
 	 } else if (n == 7) {
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 	struct perf_event *bp;
 	struct thread_struct *t = &tsk->thread;
 	struct perf_event_attr attr;
+	int err = 0;
+
+	if (ptrace_get_breakpoints(tsk) < 0)
+		return -ESRCH;
 
 	if (!t->ptrace_bps[nr]) {
 		ptrace_breakpoint_init(&attr);
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 		 * writing for the user. And anyway this is the previous
 		 * behaviour.
 		 */
-		if (IS_ERR(bp))
-			return PTR_ERR(bp);
+		if (IS_ERR(bp)) {
+			err = PTR_ERR(bp);
+			goto put;
+		}
 
 		t->ptrace_bps[nr] = bp;
 	} else {
-		int err;
-
 		bp = t->ptrace_bps[nr];
 
 		attr = bp->attr;
 		attr.bp_addr = addr;
 		err = modify_user_hw_breakpoint(bp, &attr);
-		if (err)
-			return err;
 	}
 
-
-	return 0;
+put:
+	ptrace_put_breakpoints(tsk);
+	return err;
 }
 
 /*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 08c44b08bf5b..0c016f727695 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+enum reboot_type reboot_type = BOOT_ACPI;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
@@ -478,9 +478,24 @@ void __attribute__((weak)) mach_reboot_fixups(void)
 {
 }
 
+/*
+ * Windows compatible x86 hardware expects the following on reboot:
+ *
+ * 1) If the FADT has the ACPI reboot register flag set, try it
+ * 2) If still alive, write to the keyboard controller
+ * 3) If still alive, write to the ACPI reboot register again
+ * 4) If still alive, write to the keyboard controller again
+ *
+ * If the machine is still alive at this stage, it gives up. We default to
+ * following the same pattern, except that if we're still alive after (4) we'll
+ * try to force a triple fault and then cycle between hitting the keyboard
+ * controller and doing that
+ */
 static void native_machine_emergency_restart(void)
 {
 	int i;
+	int attempt = 0;
+	int orig_reboot_type = reboot_type;
 
 	if (reboot_emergency)
 		emergency_vmx_disable_all();
@@ -502,6 +517,13 @@ static void native_machine_emergency_restart(void)
 				outb(0xfe, 0x64); /* pulse reset low */
 				udelay(50);
 			}
+			if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
+				attempt = 1;
+				reboot_type = BOOT_ACPI;
+			} else {
+				reboot_type = BOOT_TRIPLE;
+			}
+			break;
 
 		case BOOT_TRIPLE:
 			load_idt(&no_idt);
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
index 29092b38d816..1d5c46df0d78 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/kernel/reboot_32.S
@@ -21,26 +21,26 @@ r_base = .
 	/* Get our own relocated address */
 	call	1f
 1:	popl	%ebx
-	subl	$1b, %ebx
+	subl	$(1b - r_base), %ebx
 
 	/* Compute the equivalent real-mode segment */
 	movl	%ebx, %ecx
 	shrl	$4, %ecx
 	
 	/* Patch post-real-mode segment jump */
-	movw	dispatch_table(%ebx,%eax,2),%ax
-	movw	%ax, 101f(%ebx)
-	movw	%cx, 102f(%ebx)
+	movw	(dispatch_table - r_base)(%ebx,%eax,2),%ax
+	movw	%ax, (101f - r_base)(%ebx)
+	movw	%cx, (102f - r_base)(%ebx)
 
 	/* Set up the IDT for real mode. */
-	lidtl	machine_real_restart_idt(%ebx)
+	lidtl	(machine_real_restart_idt - r_base)(%ebx)
 
 	/*
 	 * Set up a GDT from which we can load segment descriptors for real
 	 * mode.  The GDT is not used in real mode; it is just needed here to
 	 * prepare the descriptors.
 	 */
-	lgdtl	machine_real_restart_gdt(%ebx)
+	lgdtl	(machine_real_restart_gdt - r_base)(%ebx)
 
 	/*
 	 * Load the data segment registers with 16-bit compatible values
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4be9b398470e..c3050af9306d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow);
 
 void __init setup_arch(char **cmdline_p)
 {
-	unsigned long flags;
-
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	visws_early_detect();
@@ -1041,9 +1039,7 @@ void __init setup_arch(char **cmdline_p)
 
 	mcheck_init();
 
-	local_irq_save(flags);
-	arch_init_ideal_nop5();
-	local_irq_restore(flags);
+	arch_init_ideal_nops();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4fd173cd8e57..40a24932a8a1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
@@ -682,6 +679,7 @@ static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	      sigset_t *oldset, struct pt_regs *regs)
 {
+	sigset_t blocked;
 	int ret;
 
 	/* Are we from a system call? */
@@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+		sigaddset(&blocked, sig);
+	set_current_blocked(&blocked);
 
 	tracehook_signal_handler(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 513deac7228d..013e7eba83bb 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
 }
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2871d3c71b6..a3c430bdfb60 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void)
 	void *mwait_ptr;
 	struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
 
-	if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)))
+	if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
 		return;
-	if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
+	if (!this_cpu_has(X86_FEATURE_CLFLSH))
 		return;
 	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
 		return;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 6515733a289d..55d9bc03f696 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -9,15 +9,6 @@
 #include <linux/uaccess.h>
 #include <asm/stacktrace.h>
 
-static void save_stack_warning(void *data, char *msg)
-{
-}
-
-static void
-save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-}
-
 static int save_stack_stack(void *data, char *name)
 {
 	return 0;
@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops save_stack_ops = {
-	.warning	= save_stack_warning,
-	.warning_symbol	= save_stack_warning_symbol,
 	.stack		= save_stack_stack,
 	.address	= save_stack_address,
 	.walk_stack	= print_context_stack,
 };
 
 static const struct stacktrace_ops save_stack_ops_nosched = {
-	.warning	= save_stack_warning,
-	.warning_symbol	= save_stack_warning_symbol,
 	.stack		= save_stack_stack,
 	.address	= save_stack_address_nosched,
 	.walk_stack	= print_context_stack,
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e9128b..6f164bd5e14d 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -35,7 +35,7 @@ void iommu_shutdown_noop(void) { }
 struct x86_init_ops x86_init __initdata = {
 
 	.resources = {
-		.probe_roms		= x86_init_noop,
+		.probe_roms		= probe_roms,
 		.reserve_resources	= reserve_standard_io_resources,
 		.memory_setup		= default_machine_specific_memory_setup,
 	},
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
 		.banner			= default_banner,
 	},
 
+	.mapping = {
+		.pagetable_reserve		= native_pagetable_reserve,
+	},
+
 	.paging = {
 		.pagetable_setup_start	= native_pagetable_setup_start,
 		.pagetable_setup_done	= native_pagetable_setup_done,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 1cd608973ce5..e191c096ab90 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -7,7 +7,7 @@
  * kernel and insert a module (lg.ko) which allows us to run other Linux
  * kernels the same way we'd run processes.  We call the first kernel the Host,
  * and the others the Guests.  The program which sets up and configures Guests
- * (such as the example in Documentation/lguest/lguest.c) is called the
+ * (such as the example in Documentation/virtual/lguest/lguest.c) is called the
  * Launcher.
  *
  * Secondly, we only run specially modified Guests, not normal kernels: setting
@@ -913,8 +913,6 @@ static struct clocksource lguest_clock = {
 	.rating		= 200,
 	.read		= lguest_clock_read,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 1 << 22,
-	.shift		= 22,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -997,7 +995,7 @@ static void lguest_time_init(void)
 	/* Set up the timer interrupt (0) to go to our simple timer routine */
 	irq_set_handler(0, lguest_time_irq);
 
-	clocksource_register(&lguest_clock);
+	clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index aa4326bfb24a..f2145cfa12a6 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,6 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * Zero a page. 	
@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
 	CFI_ENDPROC
 ENDPROC(clear_page_c)
 
+ENTRY(clear_page_c_e)
+	CFI_STARTPROC
+	movl $4096,%ecx
+	xorl %eax,%eax
+	rep stosb
+	ret
+	CFI_ENDPROC
+ENDPROC(clear_page_c_e)
+
 ENTRY(clear_page)
 	CFI_STARTPROC
 	xorl   %eax,%eax
@@ -38,21 +48,26 @@ ENTRY(clear_page)
 .Lclear_page_end:
 ENDPROC(clear_page)
 
-	/* Some CPUs run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
+	/*
+	 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
+	 * It is recommended to use this when possible.
+	 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+	 * Otherwise, use original function.
+	 *
+	 */
 
 #include <asm/cpufeature.h>
 
 	.section .altinstr_replacement,"ax"
 1:	.byte 0xeb					/* jmp <disp8> */
 	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
-2:
+2:	.byte 0xeb					/* jmp <disp8> */
+	.byte (clear_page_c_e - clear_page) - (3f - 2b)	/* offset */
+3:
 	.previous
 	.section .altinstructions,"a"
-	.align 8
-	.quad clear_page
-	.quad 1b
-	.word X86_FEATURE_REP_GOOD
-	.byte .Lclear_page_end - clear_page
-	.byte 2b - 1b
+	altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
+			     .Lclear_page_end-clear_page, 2b-1b
+	altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
+			     .Lclear_page_end-clear_page,3b-2b
 	.previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e482615195..024840266ba0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
-	.macro ALTERNATIVE_JUMP feature,orig,alt
+/*
+ * By placing feature2 after feature1 in altinstructions section, we logically
+ * implement:
+ * If CPU has feature2, jmp to alt2 is used
+ * else if CPU has feature1, jmp to alt1 is used
+ * else jmp to orig is used.
+ */
+	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
 0:
 	.byte 0xe9	/* 32bit jump */
 	.long \orig-1f	/* by default jump to orig */
 1:
 	.section .altinstr_replacement,"ax"
 2:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt-1b /* offset */   /* or alternatively to alt */
+	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
+3:	.byte 0xe9			/* near jump with 32bit immediate */
+	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
 	.previous
+
 	.section .altinstructions,"a"
-	.align 8
-	.quad  0b
-	.quad  2b
-	.word  \feature			/* when feature is set */
-	.byte  5
-	.byte  5
+	altinstruction_entry 0b,2b,\feature1,5,5
+	altinstruction_entry 0b,3b,\feature2,5,5
 	.previous
 	.endm
 
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user)
 	addq %rdx,%rcx
 	jc bad_to_user
 	cmpq TI_addr_limit(%rax),%rcx
-	jae bad_to_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	ja bad_to_user
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
+		copy_user_generic_unrolled,copy_user_generic_string,	\
+		copy_user_enhanced_fast_string
 	CFI_ENDPROC
 ENDPROC(_copy_to_user)
 
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user)
 	addq %rdx,%rcx
 	jc bad_from_user
 	cmpq TI_addr_limit(%rax),%rcx
-	jae bad_from_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	ja bad_from_user
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
+		copy_user_generic_unrolled,copy_user_generic_string,	\
+		copy_user_enhanced_fast_string
 	CFI_ENDPROC
 ENDPROC(_copy_from_user)
 
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
 	.previous
 	CFI_ENDPROC
 ENDPROC(copy_user_generic_string)
+
+/*
+ * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
+ * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ENTRY(copy_user_enhanced_fast_string)
+	CFI_STARTPROC
+	andl %edx,%edx
+	jz 2f
+	movl %edx,%ecx
+1:	rep
+	movsb
+2:	xorl %eax,%eax
+	ret
+
+	.section .fixup,"ax"
+12:	movl %ecx,%edx		/* ecx is zerorest also */
+	jmp copy_user_handle_tail
+	.previous
+
+	.section __ex_table,"a"
+	.align 8
+	.quad 1b,12b
+	.previous
+	CFI_ENDPROC
+ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 75ef61e35e38..efbf2a0ecdea 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,6 +4,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * memcpy - Copy a memory block.
@@ -37,6 +38,23 @@
 .Lmemcpy_e:
 	.previous
 
+/*
+ * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
+ * memcpy_c. Use memcpy_c_e when possible.
+ *
+ * This gets patched over the unrolled variant (below) via the
+ * alternative instructions framework:
+ */
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemcpy_c_e:
+	movq %rdi, %rax
+
+	movl %edx, %ecx
+	rep movsb
+	ret
+.Lmemcpy_e_e:
+	.previous
+
 ENTRY(__memcpy)
 ENTRY(memcpy)
 	CFI_STARTPROC
@@ -49,7 +67,7 @@ ENTRY(memcpy)
 	jb .Lhandle_tail
 
 	/*
-	 * We check whether memory false dependece could occur,
+	 * We check whether memory false dependence could occur,
 	 * then jump to corresponding copy mode.
 	 */
 	cmp  %dil, %sil
@@ -171,21 +189,22 @@ ENDPROC(memcpy)
 ENDPROC(__memcpy)
 
 	/*
-	 * Some CPUs run faster using the string copy instructions.
-	 * It is also a lot simpler. Use this when possible:
-	 */
-
-	.section .altinstructions, "a"
-	.align 8
-	.quad memcpy
-	.quad .Lmemcpy_c
-	.word X86_FEATURE_REP_GOOD
-
-	/*
+	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
+	 * If the feature is supported, memcpy_c_e() is the first choice.
+	 * If enhanced rep movsb copy is not available, use fast string copy
+	 * memcpy_c() when possible. This is faster and code is simpler than
+	 * original memcpy().
+	 * Otherwise, original memcpy() is used.
+	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
+         * feature to implement the right patch order.
+	 *
 	 * Replace only beginning, memcpy is used to apply alternatives,
 	 * so it is silly to overwrite itself with nops - reboot is the
 	 * only outcome...
 	 */
-	.byte .Lmemcpy_e - .Lmemcpy_c
-	.byte .Lmemcpy_e - .Lmemcpy_c
+	.section .altinstructions, "a"
+	altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
+			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
+	altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
+			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
 	.previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0ecb8433e5a8..d0ec9c2936d7 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,6 +8,7 @@
 #define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 
 #undef memmove
 
@@ -24,6 +25,7 @@
  */
 ENTRY(memmove)
 	CFI_STARTPROC
+
 	/* Handle more 32bytes in loop */
 	mov %rdi, %rax
 	cmp $0x20, %rdx
@@ -31,8 +33,13 @@ ENTRY(memmove)
 
 	/* Decide forward/backward copy mode */
 	cmp %rdi, %rsi
-	jb	2f
+	jge .Lmemmove_begin_forward
+	mov %rsi, %r8
+	add %rdx, %r8
+	cmp %rdi, %r8
+	jg 2f
 
+.Lmemmove_begin_forward:
 	/*
 	 * movsq instruction have many startup latency
 	 * so we handle small size by general register.
@@ -78,6 +85,8 @@ ENTRY(memmove)
 	rep movsq
 	movq %r11, (%r10)
 	jmp 13f
+.Lmemmove_end_forward:
+
 	/*
 	 * Handle data backward by movsq.
 	 */
@@ -194,4 +203,22 @@ ENTRY(memmove)
 13:
 	retq
 	CFI_ENDPROC
+
+	.section .altinstr_replacement,"ax"
+.Lmemmove_begin_forward_efs:
+	/* Forward moving data. */
+	movq %rdx, %rcx
+	rep movsb
+	retq
+.Lmemmove_end_forward_efs:
+	.previous
+
+	.section .altinstructions,"a"
+	.align 8
+	.quad .Lmemmove_begin_forward
+	.quad .Lmemmove_begin_forward_efs
+	.word X86_FEATURE_ERMS
+	.byte .Lmemmove_end_forward-.Lmemmove_begin_forward
+	.byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
+	.previous
 ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 09d344269652..79bd454b78a3 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -2,9 +2,13 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 /*
- * ISO C memset - set a memory block to a byte value.
+ * ISO C memset - set a memory block to a byte value. This function uses fast
+ * string to get better performance than the original function. The code is
+ * simpler and shorter than the orignal function as well.
  *	
  * rdi   destination
  * rsi   value (char) 
@@ -31,6 +35,28 @@
 .Lmemset_e:
 	.previous
 
+/*
+ * ISO C memset - set a memory block to a byte value. This function uses
+ * enhanced rep stosb to override the fast string function.
+ * The code is simpler and shorter than the fast string function as well.
+ *
+ * rdi   destination
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
+ * rax   original destination
+ */
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemset_c_e:
+	movq %rdi,%r9
+	movb %sil,%al
+	movl %edx,%ecx
+	rep stosb
+	movq %r9,%rax
+	ret
+.Lmemset_e_e:
+	.previous
+
 ENTRY(memset)
 ENTRY(__memset)
 	CFI_STARTPROC
@@ -112,16 +138,20 @@ ENTRY(__memset)
 ENDPROC(memset)
 ENDPROC(__memset)
 
-	/* Some CPUs run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
+	/* Some CPUs support enhanced REP MOVSB/STOSB feature.
+	 * It is recommended to use this when possible.
+	 *
+	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
+	 * instructions.
+	 *
+	 * Otherwise, use original memset function.
+	 *
+	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
+         * feature to implement the right patch order.
+	 */
 	.section .altinstructions,"a"
-	.align 8
-	.quad memset
-	.quad .Lmemset_c
-	.word X86_FEATURE_REP_GOOD
-	.byte .Lfinal - memset
-	.byte .Lmemset_e - .Lmemset_c
+	altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
+			     .Lfinal-memset,.Lmemset_e-.Lmemset_c
+	altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
+			     .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
 	.previous
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3e608edf9958..3d11327c9ab4 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,8 +23,8 @@ mmiotrace-y			:= kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
 
 obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
-obj-$(CONFIG_AMD_NUMA)		+= amdtopology_64.o
-obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
+obj-$(CONFIG_AMD_NUMA)		+= amdtopology.o
+obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology.c
index 0919c26820d4..5247d01329ca 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <linux/pci_ids.h>
@@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void)
 
 int __init amd_numa_init(void)
 {
-	unsigned long start = PFN_PHYS(0);
-	unsigned long end = PFN_PHYS(max_pfn);
+	u64 start = PFN_PHYS(0);
+	u64 end = PFN_PHYS(max_pfn);
 	unsigned numnodes;
-	unsigned long prevbase;
+	u64 prevbase;
 	int i, j, nb;
 	u32 nodeid, reg;
 	unsigned int bits, cores, apicid_base;
@@ -95,7 +96,7 @@ int __init amd_numa_init(void)
 
 	prevbase = 0;
 	for (i = 0; i < 8; i++) {
-		unsigned long base, limit;
+		u64 base, limit;
 
 		base = read_pci_config(0, nb, 1, 0x40 + i*8);
 		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
@@ -107,18 +108,18 @@ int __init amd_numa_init(void)
 			continue;
 		}
 		if (nodeid >= numnodes) {
-			pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
+			pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid,
 				base, limit);
 			continue;
 		}
 
 		if (!limit) {
-			pr_info("Skipping node entry %d (base %lx)\n",
+			pr_info("Skipping node entry %d (base %Lx)\n",
 				i, base);
 			continue;
 		}
 		if ((base >> 8) & 3 || (limit >> 8) & 3) {
-			pr_err("Node %d using interleaving mode %lx/%lx\n",
+			pr_err("Node %d using interleaving mode %Lx/%Lx\n",
 			       nodeid, (base >> 8) & 3, (limit >> 8) & 3);
 			return -EINVAL;
 		}
@@ -150,19 +151,19 @@ int __init amd_numa_init(void)
 			continue;
 		}
 		if (limit < base) {
-			pr_err("Node %d bogus settings %lx-%lx.\n",
+			pr_err("Node %d bogus settings %Lx-%Lx.\n",
 			       nodeid, base, limit);
 			continue;
 		}
 
 		/* Could sort here, but pun for now. Should not happen anyroads. */
 		if (prevbase > base) {
-			pr_err("Node map not sorted %lx,%lx\n",
+			pr_err("Node map not sorted %Lx,%Lx\n",
 			       prevbase, base);
 			return -EINVAL;
 		}
 
-		pr_info("Node %d MemBase %016lx Limit %016lx\n",
+		pr_info("Node %d MemBase %016Lx Limit %016Lx\n",
 			nodeid, base, limit);
 
 		prevbase = base;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 20e3f8702d1e..bcb394dfbb35 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -12,6 +12,7 @@
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
+#include <linux/prefetch.h>		/* prefetchw			*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289b039b..37b8b0fe8320 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 		end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
 }
 
+void __init native_pagetable_reserve(u64 start, u64 end)
+{
+	memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
 struct map_range {
 	unsigned long start;
 	unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 
 	__flush_tlb_all();
 
+	/*
+	 * Reserve the kernel pagetable pages we used (pgt_buf_start -
+	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+	 * so that they can be reused for other purposes.
+	 *
+	 * On native it just means calling memblock_x86_reserve_range, on Xen it
+	 * also means marking RW the pagetable pages that we allocated before
+	 * but that haven't been used.
+	 *
+	 * In fact on xen we mark RO the whole range pgt_buf_start -
+	 * pgt_buf_top, because we have to make sure that when
+	 * init_memory_mapping reaches the pagetable pages area, it maps
+	 * RO all the pagetable pages, including the ones that are beyond
+	 * pgt_buf_end at that time.
+	 */
 	if (!after_bootmem && pgt_buf_end > pgt_buf_start)
-		memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
-				 pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+		x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+				PFN_PHYS(pgt_buf_end));
 
 	if (!after_bootmem)
 		early_memtest(start, end);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 80088f994193..29f7c6d98179 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -678,8 +678,10 @@ static void __init zone_sizes_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] =
 		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+#endif
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
@@ -716,6 +718,7 @@ void __init paging_init(void)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 	olpc_dt_build_devicetree();
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 	zone_sizes_init();
 }
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 794233587287..d865c4aeec55 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -616,7 +616,9 @@ void __init paging_init(void)
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+#endif
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
@@ -679,14 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
-#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
-int memory_add_physaddr_to_nid(u64 start)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_vsyscall;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0369843511dc..be1ef574ce9a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -91,13 +91,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
 	/*
-	 * Check if the request spans more than any BAR in the iomem resource
-	 * tree.
-	 */
-	WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
-		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
-
-	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
 	last_pfn = last_addr >> PAGE_SHIFT;
@@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	ret_addr = (void __iomem *) (vaddr + offset);
 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 
+	/*
+	 * Check if the request spans more than any BAR in the iomem resource
+	 * tree.
+	 */
+	WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
+		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+
 	return ret_addr;
 err_free_area:
 	free_vm_area(area);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9559d360fde7..f5510d889a22 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,11 +1,39 @@
 /* Common code for 32 and 64-bit NUMA */
-#include <linux/topology.h>
-#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/init.h>
 #include <linux/bootmem.h>
-#include <asm/numa.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/sched.h>
+#include <linux/topology.h>
+
+#include <asm/e820.h>
+#include <asm/proto.h>
+#include <asm/dma.h>
 #include <asm/acpi.h>
+#include <asm/amd_nb.h>
+
+#include "numa_internal.h"
 
 int __initdata numa_off;
+nodemask_t numa_nodes_parsed __initdata;
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
+static struct numa_meminfo numa_meminfo
+#ifndef CONFIG_MEMORY_HOTPLUG
+__initdata
+#endif
+;
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
 
 static __init int numa_setup(char *opt)
 {
@@ -32,6 +60,15 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
@@ -95,6 +132,407 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
+static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
+				     struct numa_meminfo *mi)
+{
+	/* ignore zero length blks */
+	if (start == end)
+		return 0;
+
+	/* whine about and ignore invalid blks */
+	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
+		pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
+			   nid, start, end);
+		return 0;
+	}
+
+	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
+		pr_err("NUMA: too many memblk ranges\n");
+		return -EINVAL;
+	}
+
+	mi->blk[mi->nr_blks].start = start;
+	mi->blk[mi->nr_blks].end = end;
+	mi->blk[mi->nr_blks].nid = nid;
+	mi->nr_blks++;
+	return 0;
+}
+
+/**
+ * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
+ * @idx: Index of memblk to remove
+ * @mi: numa_meminfo to remove memblk from
+ *
+ * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
+ * decrementing @mi->nr_blks.
+ */
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
+{
+	mi->nr_blks--;
+	memmove(&mi->blk[idx], &mi->blk[idx + 1],
+		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
+}
+
+/**
+ * numa_add_memblk - Add one numa_memblk to numa_meminfo
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end: End address of the new memblk
+ *
+ * Add a new memblk to the default numa_meminfo.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
+}
+
+/* Initialize NODE_DATA for a node on the local memory */
+static void __init setup_node_data(int nid, u64 start, u64 end)
+{
+	const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
+	const u64 nd_high = PFN_PHYS(max_pfn_mapped);
+	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
+	bool remapped = false;
+	u64 nd_pa;
+	void *nd;
+	int tnid;
+
+	/*
+	 * Don't confuse VM with a node that doesn't have the
+	 * minimum amount of memory:
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
+	/* initialize remap allocator before aligning to ZONE_ALIGN */
+	init_alloc_remap(nid, start, end);
+
+	start = roundup(start, ZONE_ALIGN);
+
+	printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
+	       nid, start, end);
+
+	/*
+	 * Allocate node data.  Try remap allocator first, node-local
+	 * memory and then any node.  Never allocate in DMA zone.
+	 */
+	nd = alloc_remap(nid, nd_size);
+	if (nd) {
+		nd_pa = __pa(nd);
+		remapped = true;
+	} else {
+		nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
+						nd_size, SMP_CACHE_BYTES);
+		if (nd_pa == MEMBLOCK_ERROR)
+			nd_pa = memblock_find_in_range(nd_low, nd_high,
+						nd_size, SMP_CACHE_BYTES);
+		if (nd_pa == MEMBLOCK_ERROR) {
+			pr_err("Cannot find %zu bytes in node %d\n",
+			       nd_size, nid);
+			return;
+		}
+		memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
+		nd = __va(nd_pa);
+	}
+
+	/* report and initialize */
+	printk(KERN_INFO "  NODE_DATA [%016Lx - %016Lx]%s\n",
+	       nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (!remapped && tnid != nid)
+		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);
+
+	node_data[nid] = nd;
+	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+	NODE_DATA(nid)->node_id = nid;
+	NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
+	NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
+
+	node_set_online(nid);
+}
+
+/**
+ * numa_cleanup_meminfo - Cleanup a numa_meminfo
+ * @mi: numa_meminfo to clean up
+ *
+ * Sanitize @mi by merging and removing unncessary memblks.  Also check for
+ * conflicts and clear unused memblks.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
+{
+	const u64 low = 0;
+	const u64 high = PFN_PHYS(max_pfn);
+	int i, j, k;
+
+	/* first, trim all entries */
+	for (i = 0; i < mi->nr_blks; i++) {
+		struct numa_memblk *bi = &mi->blk[i];
+
+		/* make sure all blocks are inside the limits */
+		bi->start = max(bi->start, low);
+		bi->end = min(bi->end, high);
+
+		/* and there's no empty block */
+		if (bi->start >= bi->end)
+			numa_remove_memblk_from(i--, mi);
+	}
+
+	/* merge neighboring / overlapping entries */
+	for (i = 0; i < mi->nr_blks; i++) {
+		struct numa_memblk *bi = &mi->blk[i];
+
+		for (j = i + 1; j < mi->nr_blks; j++) {
+			struct numa_memblk *bj = &mi->blk[j];
+			u64 start, end;
+
+			/*
+			 * See whether there are overlapping blocks.  Whine
+			 * about but allow overlaps of the same nid.  They
+			 * will be merged below.
+			 */
+			if (bi->end > bj->start && bi->start < bj->end) {
+				if (bi->nid != bj->nid) {
+					pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
+					       bi->nid, bi->start, bi->end,
+					       bj->nid, bj->start, bj->end);
+					return -EINVAL;
+				}
+				pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
+					   bi->nid, bi->start, bi->end,
+					   bj->start, bj->end);
+			}
+
+			/*
+			 * Join together blocks on the same node, holes
+			 * between which don't overlap with memory on other
+			 * nodes.
+			 */
+			if (bi->nid != bj->nid)
+				continue;
+			start = min(bi->start, bj->start);
+			end = max(bi->end, bj->end);
+			for (k = 0; k < mi->nr_blks; k++) {
+				struct numa_memblk *bk = &mi->blk[k];
+
+				if (bi->nid == bk->nid)
+					continue;
+				if (start < bk->end && end > bk->start)
+					break;
+			}
+			if (k < mi->nr_blks)
+				continue;
+			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n",
+			       bi->nid, bi->start, bi->end, bj->start, bj->end,
+			       start, end);
+			bi->start = start;
+			bi->end = end;
+			numa_remove_memblk_from(j--, mi);
+		}
+	}
+
+	/* clear unused ones */
+	for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
+		mi->blk[i].start = mi->blk[i].end = 0;
+		mi->blk[i].nid = NUMA_NO_NODE;
+	}
+
+	return 0;
+}
+
+/*
+ * Set nodes, which have memory in @mi, in *@nodemask.
+ */
+static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
+					      const struct numa_meminfo *mi)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
+		if (mi->blk[i].start != mi->blk[i].end &&
+		    mi->blk[i].nid != NUMA_NO_NODE)
+			node_set(mi->blk[i].nid, *nodemask);
+}
+
+/**
+ * numa_reset_distance - Reset NUMA distance table
+ *
+ * The current table is freed.  The next numa_set_distance() call will
+ * create a new one.
+ */
+void __init numa_reset_distance(void)
+{
+	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
+
+	/* numa_distance could be 1LU marking allocation failure, test cnt */
+	if (numa_distance_cnt)
+		memblock_x86_free_range(__pa(numa_distance),
+					__pa(numa_distance) + size);
+	numa_distance_cnt = 0;
+	numa_distance = NULL;	/* enable table creation */
+}
+
+static int __init numa_alloc_distance(void)
+{
+	nodemask_t nodes_parsed;
+	size_t size;
+	int i, j, cnt = 0;
+	u64 phys;
+
+	/* size the new table and allocate it */
+	nodes_parsed = numa_nodes_parsed;
+	numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
+
+	for_each_node_mask(i, nodes_parsed)
+		cnt = i;
+	cnt++;
+	size = cnt * cnt * sizeof(numa_distance[0]);
+
+	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
+				      size, PAGE_SIZE);
+	if (phys == MEMBLOCK_ERROR) {
+		pr_warning("NUMA: Warning: can't allocate distance table!\n");
+		/* don't retry until explicitly reset */
+		numa_distance = (void *)1LU;
+		return -ENOMEM;
+	}
+	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
+
+	numa_distance = __va(phys);
+	numa_distance_cnt = cnt;
+
+	/* fill with the default distances */
+	for (i = 0; i < cnt; i++)
+		for (j = 0; j < cnt; j++)
+			numa_distance[i * cnt + j] = i == j ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+	printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
+
+	return 0;
+}
+
+/**
+ * numa_set_distance - Set NUMA distance from one NUMA to another
+ * @from: the 'from' node to set distance
+ * @to: the 'to'  node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance.  If distance table
+ * doesn't exist, one which is large enough to accommodate all the currently
+ * known nodes will be created.
+ *
+ * If such table cannot be allocated, a warning is printed and further
+ * calls are ignored until the distance table is reset with
+ * numa_reset_distance().
+ *
+ * If @from or @to is higher than the highest known node at the time of
+ * table creation or @distance doesn't make sense, the call is ignored.
+ * This is to allow simplification of specific NUMA config implementations.
+ */
+void __init numa_set_distance(int from, int to, int distance)
+{
+	if (!numa_distance && numa_alloc_distance() < 0)
+		return;
+
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
+		printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
+			    from, to, distance);
+		return;
+	}
+
+	if ((u8)distance != distance ||
+	    (from == to && distance != LOCAL_DISTANCE)) {
+		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+			     from, to, distance);
+		return;
+	}
+
+	numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+int __node_distance(int from, int to)
+{
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+/*
+ * Sanity check to catch more bad NUMA configurations (they are amazingly
+ * common).  Make sure the nodes cover all memory.
+ */
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+{
+	u64 numaram, e820ram;
+	int i;
+
+	numaram = 0;
+	for (i = 0; i < mi->nr_blks; i++) {
+		u64 s = mi->blk[i].start >> PAGE_SHIFT;
+		u64 e = mi->blk[i].end >> PAGE_SHIFT;
+		numaram += e - s;
+		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
+		if ((s64)numaram < 0)
+			numaram = 0;
+	}
+
+	e820ram = max_pfn - (memblock_x86_hole_size(0,
+					PFN_PHYS(max_pfn)) >> PAGE_SHIFT);
+	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+	if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
+		printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
+		       (numaram << PAGE_SHIFT) >> 20,
+		       (e820ram << PAGE_SHIFT) >> 20);
+		return false;
+	}
+	return true;
+}
+
+static int __init numa_register_memblks(struct numa_meminfo *mi)
+{
+	int i, nid;
+
+	/* Account for nodes with cpus and no memory */
+	node_possible_map = numa_nodes_parsed;
+	numa_nodemask_from_meminfo(&node_possible_map, mi);
+	if (WARN_ON(nodes_empty(node_possible_map)))
+		return -EINVAL;
+
+	for (i = 0; i < mi->nr_blks; i++)
+		memblock_x86_register_active_regions(mi->blk[i].nid,
+					mi->blk[i].start >> PAGE_SHIFT,
+					mi->blk[i].end >> PAGE_SHIFT);
+
+	/* for out of order entries */
+	sort_node_map();
+	if (!numa_meminfo_cover_memory(mi))
+		return -EINVAL;
+
+	/* Finally register nodes. */
+	for_each_node_mask(nid, node_possible_map) {
+		u64 start = PFN_PHYS(max_pfn);
+		u64 end = 0;
+
+		for (i = 0; i < mi->nr_blks; i++) {
+			if (nid != mi->blk[i].nid)
+				continue;
+			start = min(mi->blk[i].start, start);
+			end = max(mi->blk[i].end, end);
+		}
+
+		if (start < end)
+			setup_node_data(nid, start, end);
+	}
+
+	return 0;
+}
+
 /*
  * There are unfortunately some poorly designed mainboards around that
  * only connect memory to a single CPU. This breaks the 1:1 cpu->node
@@ -102,7 +540,7 @@ void __init setup_node_to_cpumask_map(void)
  * as the number of CPUs is not known yet. We round robin the existing
  * nodes.
  */
-void __init numa_init_array(void)
+static void __init numa_init_array(void)
 {
 	int rr, i;
 
@@ -117,6 +555,95 @@ void __init numa_init_array(void)
 	}
 }
 
+static int __init numa_init(int (*init_func)(void))
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < MAX_LOCAL_APIC; i++)
+		set_apicid_to_node(i, NUMA_NO_NODE);
+
+	nodes_clear(numa_nodes_parsed);
+	nodes_clear(node_possible_map);
+	nodes_clear(node_online_map);
+	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
+	remove_all_active_ranges();
+	numa_reset_distance();
+
+	ret = init_func();
+	if (ret < 0)
+		return ret;
+	ret = numa_cleanup_meminfo(&numa_meminfo);
+	if (ret < 0)
+		return ret;
+
+	numa_emulation(&numa_meminfo, numa_distance_cnt);
+
+	ret = numa_register_memblks(&numa_meminfo);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < nr_cpu_ids; i++) {
+		int nid = early_cpu_to_node(i);
+
+		if (nid == NUMA_NO_NODE)
+			continue;
+		if (!node_online(nid))
+			numa_clear_node(i);
+	}
+	numa_init_array();
+	return 0;
+}
+
+/**
+ * dummy_numa_init - Fallback dummy NUMA init
+ *
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
+ *
+ * Must online at least one node and add memory blocks that cover all
+ * allowed memory.  This function must not fail.
+ */
+static int __init dummy_numa_init(void)
+{
+	printk(KERN_INFO "%s\n",
+	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
+	printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n",
+	       0LLU, PFN_PHYS(max_pfn));
+
+	node_set(0, numa_nodes_parsed);
+	numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
+
+	return 0;
+}
+
+/**
+ * x86_numa_init - Initialize NUMA
+ *
+ * Try each configured NUMA initialization method until one succeeds.  The
+ * last fallback is dummy single node config encomapssing whole memory and
+ * never fails.
+ */
+void __init x86_numa_init(void)
+{
+	if (!numa_off) {
+#ifdef CONFIG_X86_NUMAQ
+		if (!numa_init(numaq_numa_init))
+			return;
+#endif
+#ifdef CONFIG_ACPI_NUMA
+		if (!numa_init(x86_acpi_numa_init))
+			return;
+#endif
+#ifdef CONFIG_AMD_NUMA
+		if (!numa_init(amd_numa_init))
+			return;
+#endif
+	}
+
+	numa_init(dummy_numa_init);
+}
+
 static __init int find_near_online_node(int node)
 {
 	int n, val;
@@ -213,53 +740,48 @@ int early_cpu_to_node(int cpu)
 	return per_cpu(x86_cpu_to_node_map, cpu);
 }
 
-struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+void debug_cpumask_set_cpu(int cpu, int node, bool enable)
 {
-	int node = early_cpu_to_node(cpu);
 	struct cpumask *mask;
 	char buf[64];
 
 	if (node == NUMA_NO_NODE) {
 		/* early_cpu_to_node() already emits a warning and trace */
-		return NULL;
+		return;
 	}
 	mask = node_to_cpumask_map[node];
 	if (!mask) {
 		pr_err("node_to_cpumask_map[%i] NULL\n", node);
 		dump_stack();
-		return NULL;
+		return;
 	}
 
+	if (enable)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_clear_cpu(cpu, mask);
+
 	cpulist_scnprintf(buf, sizeof(buf), mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
 		enable ? "numa_add_cpu" : "numa_remove_cpu",
 		cpu, node, buf);
-	return mask;
+	return;
 }
 
 # ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void __cpuinit numa_set_cpumask(int cpu, bool enable)
 {
-	struct cpumask *mask;
-
-	mask = debug_cpumask_set_cpu(cpu, enable);
-	if (!mask)
-		return;
-
-	if (enable)
-		cpumask_set_cpu(cpu, mask);
-	else
-		cpumask_clear_cpu(cpu, mask);
+	debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
 }
 
 void __cpuinit numa_add_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 1);
+	numa_set_cpumask(cpu, true);
 }
 
 void __cpuinit numa_remove_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 0);
+	numa_set_cpumask(cpu, false);
 }
 # endif	/* !CONFIG_NUMA_EMU */
 
@@ -287,3 +809,18 @@ const struct cpumask *cpumask_of_node(int node)
 EXPORT_SYMBOL(cpumask_of_node);
 
 #endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int memory_add_physaddr_to_nid(u64 start)
+{
+	struct numa_meminfo *mi = &numa_meminfo;
+	int nid = mi->blk[0].nid;
+	int i;
+
+	for (i = 0; i < mi->nr_blks; i++)
+		if (mi->blk[i].start <= start && mi->blk[i].end > start)
+			nid = mi->blk[i].nid;
+	return nid;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index bde3906420df..849a975d3fa0 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -22,39 +22,11 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/nodemask.h>
 #include <linux/module.h>
-#include <linux/kexec.h>
-#include <linux/pfn.h>
-#include <linux/swap.h>
-#include <linux/acpi.h>
-
-#include <asm/e820.h>
-#include <asm/setup.h>
-#include <asm/mmzone.h>
-#include <asm/bios_ebda.h>
-#include <asm/proto.h>
-
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
-/*
- * numa interface - we expect the numa architecture specific code to have
- *                  populated the following initialisation.
- *
- * 1) node_online_map  - the map of all nodes configured (online) in the system
- * 2) node_start_pfn   - the starting page frame number for a node
- * 3) node_end_pfn     - the ending page fram number for a node
- */
-unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly;
-unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
 
+#include "numa_internal.h"
 
 #ifdef CONFIG_DISCONTIGMEM
 /*
@@ -99,108 +71,46 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
 }
 #endif
 
-extern unsigned long find_max_low_pfn(void);
 extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
 
-unsigned long node_remap_size[MAX_NUMNODES];
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-static unsigned long kva_start_pfn;
-static unsigned long kva_pages;
-
-int __cpuinit numa_cpu_node(int cpu)
-{
-	return apic->x86_32_numa_cpu_node(cpu);
-}
-
-/*
- * FLAT - support for basic PC memory model with discontig enabled, essentially
- *        a single node with all available processors in it with a flat
- *        memory map.
- */
-int __init get_memcfg_numa_flat(void)
-{
-	printk(KERN_DEBUG "NUMA - single node, flat memory mode\n");
-
-	node_start_pfn[0] = 0;
-	node_end_pfn[0] = max_pfn;
-	memblock_x86_register_active_regions(0, 0, max_pfn);
-	memory_present(0, 0, max_pfn);
-	node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
-
-        /* Indicate there is one node available. */
-	nodes_clear(node_online_map);
-	node_set_online(0);
-	return 1;
-}
-
-/*
- * Find the highest page frame number we have available for the node
- */
-static void __init propagate_e820_map_node(int nid)
-{
-	if (node_end_pfn[nid] > max_pfn)
-		node_end_pfn[nid] = max_pfn;
-	/*
-	 * if a user has given mem=XXXX, then we need to make sure 
-	 * that the node _starts_ before that, too, not just ends
-	 */
-	if (node_start_pfn[nid] > max_pfn)
-		node_start_pfn[nid] = max_pfn;
-	BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]);
-}
-
-/* 
- * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
- * method.  For node zero take this from the bottom of memory, for
- * subsequent nodes place them at node_remap_start_vaddr which contains
- * node local data in physically node local memory.  See setup_memory()
- * for details.
- */
-static void __init allocate_pgdat(int nid)
-{
-	char buf[16];
-
-	if (node_has_online_mem(nid) && node_remap_start_vaddr[nid])
-		NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
-	else {
-		unsigned long pgdat_phys;
-		pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT,
-				 max_pfn_mapped<<PAGE_SHIFT,
-				 sizeof(pg_data_t),
-				 PAGE_SIZE);
-		NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
-		memset(buf, 0, sizeof(buf));
-		sprintf(buf, "NODE_DATA %d",  nid);
-		memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf);
-	}
-	printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
-		nid, (unsigned long)NODE_DATA(nid));
-}
-
 /*
- * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel
- * virtual address space (KVA) is reserved and portions of nodes are mapped
- * using it. This is to allow node-local memory to be allocated for
- * structures that would normally require ZONE_NORMAL. The memory is
- * allocated with alloc_remap() and callers should be prepared to allocate
- * from the bootmem allocator instead.
+ * Remap memory allocator
  */
 static unsigned long node_remap_start_pfn[MAX_NUMNODES];
 static void *node_remap_end_vaddr[MAX_NUMNODES];
 static void *node_remap_alloc_vaddr[MAX_NUMNODES];
-static unsigned long node_remap_offset[MAX_NUMNODES];
 
+/**
+ * alloc_remap - Allocate remapped memory
+ * @nid: NUMA node to allocate memory from
+ * @size: The size of allocation
+ *
+ * Allocate @size bytes from the remap area of NUMA node @nid.  The
+ * size of the remap area is predetermined by init_alloc_remap() and
+ * only the callers considered there should call this function.  For
+ * more info, please read the comment on top of init_alloc_remap().
+ *
+ * The caller must be ready to handle allocation failure from this
+ * function and fall back to regular memory allocator in such cases.
+ *
+ * CONTEXT:
+ * Single CPU early boot context.
+ *
+ * RETURNS:
+ * Pointer to the allocated memory on success, %NULL on failure.
+ */
 void *alloc_remap(int nid, unsigned long size)
 {
 	void *allocation = node_remap_alloc_vaddr[nid];
 
 	size = ALIGN(size, L1_CACHE_BYTES);
 
-	if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
+	if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
 		return NULL;
 
 	node_remap_alloc_vaddr[nid] += size;
@@ -209,26 +119,6 @@ void *alloc_remap(int nid, unsigned long size)
 	return allocation;
 }
 
-static void __init remap_numa_kva(void)
-{
-	void *vaddr;
-	unsigned long pfn;
-	int node;
-
-	for_each_online_node(node) {
-		printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
-		for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
-			vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
-			printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
-				(unsigned long)vaddr,
-				node_remap_start_pfn[node] + pfn);
-			set_pmd_pfn((ulong) vaddr, 
-				node_remap_start_pfn[node] + pfn, 
-				PAGE_KERNEL_LARGE);
-		}
-	}
-}
-
 #ifdef CONFIG_HIBERNATION
 /**
  * resume_map_numa_kva - add KVA mapping to the temporary page tables created
@@ -240,15 +130,16 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 	int node;
 
 	for_each_online_node(node) {
-		unsigned long start_va, start_pfn, size, pfn;
+		unsigned long start_va, start_pfn, nr_pages, pfn;
 
 		start_va = (unsigned long)node_remap_start_vaddr[node];
 		start_pfn = node_remap_start_pfn[node];
-		size = node_remap_size[node];
+		nr_pages = (node_remap_end_vaddr[node] -
+			    node_remap_start_vaddr[node]) >> PAGE_SHIFT;
 
 		printk(KERN_DEBUG "%s: node %d\n", __func__, node);
 
-		for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+		for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
 			unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
 			pgd_t *pgd = pgd_base + pgd_index(vaddr);
 			pud_t *pud = pud_offset(pgd, vaddr);
@@ -264,132 +155,89 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 }
 #endif
 
-static __init unsigned long calculate_numa_remap_pages(void)
+/**
+ * init_alloc_remap - Initialize remap allocator for a NUMA node
+ * @nid: NUMA node to initizlie remap allocator for
+ *
+ * NUMA nodes may end up without any lowmem.  As allocating pgdat and
+ * memmap on a different node with lowmem is inefficient, a special
+ * remap allocator is implemented which can be used by alloc_remap().
+ *
+ * For each node, the amount of memory which will be necessary for
+ * pgdat and memmap is calculated and two memory areas of the size are
+ * allocated - one in the node and the other in lowmem; then, the area
+ * in the node is remapped to the lowmem area.
+ *
+ * As pgdat and memmap must be allocated in lowmem anyway, this
+ * doesn't waste lowmem address space; however, the actual lowmem
+ * which gets remapped over is wasted.  The amount shouldn't be
+ * problematic on machines this feature will be used.
+ *
+ * Initialization failure isn't fatal.  alloc_remap() is used
+ * opportunistically and the callers will fall back to other memory
+ * allocation mechanisms on failure.
+ */
+void __init init_alloc_remap(int nid, u64 start, u64 end)
 {
-	int nid;
-	unsigned long size, reserve_pages = 0;
-
-	for_each_online_node(nid) {
-		u64 node_kva_target;
-		u64 node_kva_final;
-
-		/*
-		 * The acpi/srat node info can show hot-add memroy zones
-		 * where memory could be added but not currently present.
-		 */
-		printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
-			nid, node_start_pfn[nid], node_end_pfn[nid]);
-		if (node_start_pfn[nid] > max_pfn)
-			continue;
-		if (!node_end_pfn[nid])
-			continue;
-		if (node_end_pfn[nid] > max_pfn)
-			node_end_pfn[nid] = max_pfn;
-
-		/* ensure the remap includes space for the pgdat. */
-		size = node_remap_size[nid] + sizeof(pg_data_t);
-
-		/* convert size to large (pmd size) pages, rounding up */
-		size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
-		/* now the roundup is correct, convert to PAGE_SIZE pages */
-		size = size * PTRS_PER_PTE;
-
-		node_kva_target = round_down(node_end_pfn[nid] - size,
-						 PTRS_PER_PTE);
-		node_kva_target <<= PAGE_SHIFT;
-		do {
-			node_kva_final = memblock_find_in_range(node_kva_target,
-					((u64)node_end_pfn[nid])<<PAGE_SHIFT,
-						((u64)size)<<PAGE_SHIFT,
-						LARGE_PAGE_BYTES);
-			node_kva_target -= LARGE_PAGE_BYTES;
-		} while (node_kva_final == MEMBLOCK_ERROR &&
-			 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
-
-		if (node_kva_final == MEMBLOCK_ERROR)
-			panic("Can not get kva ram\n");
-
-		node_remap_size[nid] = size;
-		node_remap_offset[nid] = reserve_pages;
-		reserve_pages += size;
-		printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of"
-				  " node %d at %llx\n",
-				size, nid, node_kva_final>>PAGE_SHIFT);
-
-		/*
-		 *  prevent kva address below max_low_pfn want it on system
-		 *  with less memory later.
-		 *  layout will be: KVA address , KVA RAM
-		 *
-		 *  we are supposed to only record the one less then max_low_pfn
-		 *  but we could have some hole in high memory, and it will only
-		 *  check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
-		 *  to use it as free.
-		 *  So memblock_x86_reserve_range here, hope we don't run out of that array
-		 */
-		memblock_x86_reserve_range(node_kva_final,
-			      node_kva_final+(((u64)size)<<PAGE_SHIFT),
-			      "KVA RAM");
-
-		node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
-	}
-	printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
-			reserve_pages);
-	return reserve_pages;
-}
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = end >> PAGE_SHIFT;
+	unsigned long size, pfn;
+	u64 node_pa, remap_pa;
+	void *remap_va;
 
-static void init_remap_allocator(int nid)
-{
-	node_remap_start_vaddr[nid] = pfn_to_kaddr(
-			kva_start_pfn + node_remap_offset[nid]);
-	node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
-		(node_remap_size[nid] * PAGE_SIZE);
-	node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
-		ALIGN(sizeof(pg_data_t), PAGE_SIZE);
-
-	printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
-		(ulong) node_remap_start_vaddr[nid],
-		(ulong) node_remap_end_vaddr[nid]);
+	/*
+	 * The acpi/srat node info can show hot-add memroy zones where
+	 * memory could be added but not currently present.
+	 */
+	printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
+	       nid, start_pfn, end_pfn);
+
+	/* calculate the necessary space aligned to large page size */
+	size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
+	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+	size = ALIGN(size, LARGE_PAGE_BYTES);
+
+	/* allocate node memory and the lowmem remap area */
+	node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
+	if (node_pa == MEMBLOCK_ERROR) {
+		pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
+			   size, nid);
+		return;
+	}
+	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+
+	remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
+					  max_low_pfn << PAGE_SHIFT,
+					  size, LARGE_PAGE_BYTES);
+	if (remap_pa == MEMBLOCK_ERROR) {
+		pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
+			   size, nid);
+		memblock_x86_free_range(node_pa, node_pa + size);
+		return;
+	}
+	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
+	remap_va = phys_to_virt(remap_pa);
+
+	/* perform actual remap */
+	for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
+		set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
+			    (node_pa >> PAGE_SHIFT) + pfn,
+			    PAGE_KERNEL_LARGE);
+
+	/* initialize remap allocator parameters */
+	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
+	node_remap_start_vaddr[nid] = remap_va;
+	node_remap_end_vaddr[nid] = remap_va + size;
+	node_remap_alloc_vaddr[nid] = remap_va;
+
+	printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
+	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
 }
 
 void __init initmem_init(void)
 {
-	int nid;
-	long kva_target_pfn;
-
-	/*
-	 * When mapping a NUMA machine we allocate the node_mem_map arrays
-	 * from node local memory.  They are then mapped directly into KVA
-	 * between zone normal and vmalloc space.  Calculate the size of
-	 * this space and use it to adjust the boundary between ZONE_NORMAL
-	 * and ZONE_HIGHMEM.
-	 */
-
-	get_memcfg_numa();
-	numa_init_array();
-
-	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
+	x86_numa_init();
 
-	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
-	do {
-		kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT,
-					max_low_pfn<<PAGE_SHIFT,
-					kva_pages<<PAGE_SHIFT,
-					PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
-		kva_target_pfn -= PTRS_PER_PTE;
-	} while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn);
-
-	if (kva_start_pfn == MEMBLOCK_ERROR)
-		panic("Can not get kva space\n");
-
-	printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
-		kva_start_pfn, max_low_pfn);
-	printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
-
-	/* avoid clash with initrd */
-	memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
-		      (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
-		     "KVA PG");
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
@@ -409,51 +257,9 @@ void __init initmem_init(void)
 
 	printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(max_low_pfn));
-	for_each_online_node(nid) {
-		init_remap_allocator(nid);
-
-		allocate_pgdat(nid);
-	}
-	remap_numa_kva();
 
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
-	for_each_online_node(nid)
-		propagate_e820_map_node(nid);
-
-	for_each_online_node(nid) {
-		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-		NODE_DATA(nid)->node_id = nid;
-	}
 
 	setup_bootmem_allocator();
 }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int paddr_to_nid(u64 addr)
-{
-	int nid;
-	unsigned long pfn = PFN_DOWN(addr);
-
-	for_each_node(nid)
-		if (node_start_pfn[nid] <= pfn &&
-		    pfn < node_end_pfn[nid])
-			return nid;
-
-	return -1;
-}
-
-/*
- * This function is used to ask node id BEFORE memmap and mem_section's
- * initialization (pfn_to_nid() can't be used yet).
- * If _PXM is not defined on ACPI's DSDT, node id must be found by this.
- */
-int memory_add_physaddr_to_nid(u64 addr)
-{
-	int nid = paddr_to_nid(addr);
-	return (nid >= 0) ? nid : 0;
-}
-
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc72033..dd27f401f0a0 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -2,646 +2,13 @@
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/init.h>
 #include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <linux/sched.h>
-#include <linux/acpi.h>
-
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/dma.h>
-#include <asm/acpi.h>
-#include <asm/amd_nb.h>
 
 #include "numa_internal.h"
 
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
-nodemask_t numa_nodes_parsed __initdata;
-
-struct memnode memnode;
-
-static unsigned long __initdata nodemap_addr;
-static unsigned long __initdata nodemap_size;
-
-static struct numa_meminfo numa_meminfo __initdata;
-
-static int numa_distance_cnt;
-static u8 *numa_distance;
-
-/*
- * Given a shift value, try to populate memnodemap[]
- * Returns :
- * 1 if OK
- * 0 if memnodmap[] too small (of shift too small)
- * -1 if node overlap or lost ram (shift too big)
- */
-static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
-{
-	unsigned long addr, end;
-	int i, res = -1;
-
-	memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
-	for (i = 0; i < mi->nr_blks; i++) {
-		addr = mi->blk[i].start;
-		end = mi->blk[i].end;
-		if (addr >= end)
-			continue;
-		if ((end >> shift) >= memnodemapsize)
-			return 0;
-		do {
-			if (memnodemap[addr >> shift] != NUMA_NO_NODE)
-				return -1;
-			memnodemap[addr >> shift] = mi->blk[i].nid;
-			addr += (1UL << shift);
-		} while (addr < end);
-		res = 1;
-	}
-	return res;
-}
-
-static int __init allocate_cachealigned_memnodemap(void)
-{
-	unsigned long addr;
-
-	memnodemap = memnode.embedded_map;
-	if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
-		return 0;
-
-	addr = 0x8000;
-	nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
-	nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
-				      nodemap_size, L1_CACHE_BYTES);
-	if (nodemap_addr == MEMBLOCK_ERROR) {
-		printk(KERN_ERR
-		       "NUMA: Unable to allocate Memory to Node hash map\n");
-		nodemap_addr = nodemap_size = 0;
-		return -1;
-	}
-	memnodemap = phys_to_virt(nodemap_addr);
-	memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
-
-	printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
-	       nodemap_addr, nodemap_addr + nodemap_size);
-	return 0;
-}
-
-/*
- * The LSB of all start and end addresses in the node map is the value of the
- * maximum possible shift.
- */
-static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
-{
-	int i, nodes_used = 0;
-	unsigned long start, end;
-	unsigned long bitfield = 0, memtop = 0;
-
-	for (i = 0; i < mi->nr_blks; i++) {
-		start = mi->blk[i].start;
-		end = mi->blk[i].end;
-		if (start >= end)
-			continue;
-		bitfield |= start;
-		nodes_used++;
-		if (end > memtop)
-			memtop = end;
-	}
-	if (nodes_used <= 1)
-		i = 63;
-	else
-		i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
-	memnodemapsize = (memtop >> i)+1;
-	return i;
-}
-
-static int __init compute_hash_shift(const struct numa_meminfo *mi)
-{
-	int shift;
-
-	shift = extract_lsb_from_nodes(mi);
-	if (allocate_cachealigned_memnodemap())
-		return -1;
-	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
-		shift);
-
-	if (populate_memnodemap(mi, shift) != 1) {
-		printk(KERN_INFO "Your memory is not aligned you need to "
-		       "rebuild your kernel with a bigger NODEMAPSIZE "
-		       "shift=%d\n", shift);
-		return -1;
-	}
-	return shift;
-}
-
-int __meminit  __early_pfn_to_nid(unsigned long pfn)
-{
-	return phys_to_nid(pfn << PAGE_SHIFT);
-}
-
-static void * __init early_node_mem(int nodeid, unsigned long start,
-				    unsigned long end, unsigned long size,
-				    unsigned long align)
-{
-	unsigned long mem;
-
-	/*
-	 * put it on high as possible
-	 * something will go with NODE_DATA
-	 */
-	if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
-		start = MAX_DMA_PFN<<PAGE_SHIFT;
-	if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
-	    end > (MAX_DMA32_PFN<<PAGE_SHIFT))
-		start = MAX_DMA32_PFN<<PAGE_SHIFT;
-	mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align);
-	if (mem != MEMBLOCK_ERROR)
-		return __va(mem);
-
-	/* extend the search scope */
-	end = max_pfn_mapped << PAGE_SHIFT;
-	start = MAX_DMA_PFN << PAGE_SHIFT;
-	mem = memblock_find_in_range(start, end, size, align);
-	if (mem != MEMBLOCK_ERROR)
-		return __va(mem);
-
-	printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
-		       size, nodeid);
-
-	return NULL;
-}
-
-static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
-				     struct numa_meminfo *mi)
-{
-	/* ignore zero length blks */
-	if (start == end)
-		return 0;
-
-	/* whine about and ignore invalid blks */
-	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
-		pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
-			   nid, start, end);
-		return 0;
-	}
-
-	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
-		pr_err("NUMA: too many memblk ranges\n");
-		return -EINVAL;
-	}
-
-	mi->blk[mi->nr_blks].start = start;
-	mi->blk[mi->nr_blks].end = end;
-	mi->blk[mi->nr_blks].nid = nid;
-	mi->nr_blks++;
-	return 0;
-}
-
-/**
- * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
- * @idx: Index of memblk to remove
- * @mi: numa_meminfo to remove memblk from
- *
- * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
- * decrementing @mi->nr_blks.
- */
-void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
-{
-	mi->nr_blks--;
-	memmove(&mi->blk[idx], &mi->blk[idx + 1],
-		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
-}
-
-/**
- * numa_add_memblk - Add one numa_memblk to numa_meminfo
- * @nid: NUMA node ID of the new memblk
- * @start: Start address of the new memblk
- * @end: End address of the new memblk
- *
- * Add a new memblk to the default numa_meminfo.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
-	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
-}
-
-/* Initialize bootmem allocator for a node */
-void __init
-setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
-{
-	unsigned long start_pfn, last_pfn, nodedata_phys;
-	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
-	int nid;
-
-	if (!end)
-		return;
-
-	/*
-	 * Don't confuse VM with a node that doesn't have the
-	 * minimum amount of memory:
-	 */
-	if (end && (end - start) < NODE_MIN_SIZE)
-		return;
-
-	start = roundup(start, ZONE_ALIGN);
-
-	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
-	       start, end);
-
-	start_pfn = start >> PAGE_SHIFT;
-	last_pfn = end >> PAGE_SHIFT;
-
-	node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size,
-					   SMP_CACHE_BYTES);
-	if (node_data[nodeid] == NULL)
-		return;
-	nodedata_phys = __pa(node_data[nodeid]);
-	memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
-	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
-		nodedata_phys + pgdat_size - 1);
-	nid = phys_to_nid(nodedata_phys);
-	if (nid != nodeid)
-		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
-
-	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
-	NODE_DATA(nodeid)->node_id = nodeid;
-	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-	NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
-
-	node_set_online(nodeid);
-}
-
-/**
- * numa_cleanup_meminfo - Cleanup a numa_meminfo
- * @mi: numa_meminfo to clean up
- *
- * Sanitize @mi by merging and removing unncessary memblks.  Also check for
- * conflicts and clear unused memblks.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
-{
-	const u64 low = 0;
-	const u64 high = (u64)max_pfn << PAGE_SHIFT;
-	int i, j, k;
-
-	for (i = 0; i < mi->nr_blks; i++) {
-		struct numa_memblk *bi = &mi->blk[i];
-
-		/* make sure all blocks are inside the limits */
-		bi->start = max(bi->start, low);
-		bi->end = min(bi->end, high);
-
-		/* and there's no empty block */
-		if (bi->start == bi->end) {
-			numa_remove_memblk_from(i--, mi);
-			continue;
-		}
-
-		for (j = i + 1; j < mi->nr_blks; j++) {
-			struct numa_memblk *bj = &mi->blk[j];
-			unsigned long start, end;
-
-			/*
-			 * See whether there are overlapping blocks.  Whine
-			 * about but allow overlaps of the same nid.  They
-			 * will be merged below.
-			 */
-			if (bi->end > bj->start && bi->start < bj->end) {
-				if (bi->nid != bj->nid) {
-					pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
-					       bi->nid, bi->start, bi->end,
-					       bj->nid, bj->start, bj->end);
-					return -EINVAL;
-				}
-				pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
-					   bi->nid, bi->start, bi->end,
-					   bj->start, bj->end);
-			}
-
-			/*
-			 * Join together blocks on the same node, holes
-			 * between which don't overlap with memory on other
-			 * nodes.
-			 */
-			if (bi->nid != bj->nid)
-				continue;
-			start = max(min(bi->start, bj->start), low);
-			end = min(max(bi->end, bj->end), high);
-			for (k = 0; k < mi->nr_blks; k++) {
-				struct numa_memblk *bk = &mi->blk[k];
-
-				if (bi->nid == bk->nid)
-					continue;
-				if (start < bk->end && end > bk->start)
-					break;
-			}
-			if (k < mi->nr_blks)
-				continue;
-			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
-			       bi->nid, bi->start, bi->end, bj->start, bj->end,
-			       start, end);
-			bi->start = start;
-			bi->end = end;
-			numa_remove_memblk_from(j--, mi);
-		}
-	}
-
-	for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
-		mi->blk[i].start = mi->blk[i].end = 0;
-		mi->blk[i].nid = NUMA_NO_NODE;
-	}
-
-	return 0;
-}
-
-/*
- * Set nodes, which have memory in @mi, in *@nodemask.
- */
-static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
-					      const struct numa_meminfo *mi)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
-		if (mi->blk[i].start != mi->blk[i].end &&
-		    mi->blk[i].nid != NUMA_NO_NODE)
-			node_set(mi->blk[i].nid, *nodemask);
-}
-
-/**
- * numa_reset_distance - Reset NUMA distance table
- *
- * The current table is freed.  The next numa_set_distance() call will
- * create a new one.
- */
-void __init numa_reset_distance(void)
-{
-	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
-
-	/* numa_distance could be 1LU marking allocation failure, test cnt */
-	if (numa_distance_cnt)
-		memblock_x86_free_range(__pa(numa_distance),
-					__pa(numa_distance) + size);
-	numa_distance_cnt = 0;
-	numa_distance = NULL;	/* enable table creation */
-}
-
-static int __init numa_alloc_distance(void)
-{
-	nodemask_t nodes_parsed;
-	size_t size;
-	int i, j, cnt = 0;
-	u64 phys;
-
-	/* size the new table and allocate it */
-	nodes_parsed = numa_nodes_parsed;
-	numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
-
-	for_each_node_mask(i, nodes_parsed)
-		cnt = i;
-	cnt++;
-	size = cnt * cnt * sizeof(numa_distance[0]);
-
-	phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
-				      size, PAGE_SIZE);
-	if (phys == MEMBLOCK_ERROR) {
-		pr_warning("NUMA: Warning: can't allocate distance table!\n");
-		/* don't retry until explicitly reset */
-		numa_distance = (void *)1LU;
-		return -ENOMEM;
-	}
-	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
-
-	numa_distance = __va(phys);
-	numa_distance_cnt = cnt;
-
-	/* fill with the default distances */
-	for (i = 0; i < cnt; i++)
-		for (j = 0; j < cnt; j++)
-			numa_distance[i * cnt + j] = i == j ?
-				LOCAL_DISTANCE : REMOTE_DISTANCE;
-	printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
-
-	return 0;
-}
-
-/**
- * numa_set_distance - Set NUMA distance from one NUMA to another
- * @from: the 'from' node to set distance
- * @to: the 'to'  node to set distance
- * @distance: NUMA distance
- *
- * Set the distance from node @from to @to to @distance.  If distance table
- * doesn't exist, one which is large enough to accommodate all the currently
- * known nodes will be created.
- *
- * If such table cannot be allocated, a warning is printed and further
- * calls are ignored until the distance table is reset with
- * numa_reset_distance().
- *
- * If @from or @to is higher than the highest known node at the time of
- * table creation or @distance doesn't make sense, the call is ignored.
- * This is to allow simplification of specific NUMA config implementations.
- */
-void __init numa_set_distance(int from, int to, int distance)
-{
-	if (!numa_distance && numa_alloc_distance() < 0)
-		return;
-
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
-		printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
-			    from, to, distance);
-		return;
-	}
-
-	if ((u8)distance != distance ||
-	    (from == to && distance != LOCAL_DISTANCE)) {
-		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
-			     from, to, distance);
-		return;
-	}
-
-	numa_distance[from * numa_distance_cnt + to] = distance;
-}
-
-int __node_distance(int from, int to)
-{
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
-		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
-	return numa_distance[from * numa_distance_cnt + to];
-}
-EXPORT_SYMBOL(__node_distance);
-
-/*
- * Sanity check to catch more bad NUMA configurations (they are amazingly
- * common).  Make sure the nodes cover all memory.
- */
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
-{
-	unsigned long numaram, e820ram;
-	int i;
-
-	numaram = 0;
-	for (i = 0; i < mi->nr_blks; i++) {
-		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
-		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
-		numaram += e - s;
-		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
-		if ((long)numaram < 0)
-			numaram = 0;
-	}
-
-	e820ram = max_pfn - (memblock_x86_hole_size(0,
-					max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
-	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
-	if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
-		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
-		       (numaram << PAGE_SHIFT) >> 20,
-		       (e820ram << PAGE_SHIFT) >> 20);
-		return false;
-	}
-	return true;
-}
-
-static int __init numa_register_memblks(struct numa_meminfo *mi)
-{
-	int i, nid;
-
-	/* Account for nodes with cpus and no memory */
-	node_possible_map = numa_nodes_parsed;
-	numa_nodemask_from_meminfo(&node_possible_map, mi);
-	if (WARN_ON(nodes_empty(node_possible_map)))
-		return -EINVAL;
-
-	memnode_shift = compute_hash_shift(mi);
-	if (memnode_shift < 0) {
-		printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
-		return -EINVAL;
-	}
-
-	for (i = 0; i < mi->nr_blks; i++)
-		memblock_x86_register_active_regions(mi->blk[i].nid,
-					mi->blk[i].start >> PAGE_SHIFT,
-					mi->blk[i].end >> PAGE_SHIFT);
-
-	/* for out of order entries */
-	sort_node_map();
-	if (!numa_meminfo_cover_memory(mi))
-		return -EINVAL;
-
-	/* Finally register nodes. */
-	for_each_node_mask(nid, node_possible_map) {
-		u64 start = (u64)max_pfn << PAGE_SHIFT;
-		u64 end = 0;
-
-		for (i = 0; i < mi->nr_blks; i++) {
-			if (nid != mi->blk[i].nid)
-				continue;
-			start = min(mi->blk[i].start, start);
-			end = max(mi->blk[i].end, end);
-		}
-
-		if (start < end)
-			setup_node_bootmem(nid, start, end);
-	}
-
-	return 0;
-}
-
-/**
- * dummy_numma_init - Fallback dummy NUMA init
- *
- * Used if there's no underlying NUMA architecture, NUMA initialization
- * fails, or NUMA is disabled on the command line.
- *
- * Must online at least one node and add memory blocks that cover all
- * allowed memory.  This function must not fail.
- */
-static int __init dummy_numa_init(void)
-{
-	printk(KERN_INFO "%s\n",
-	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
-	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
-	       0LU, max_pfn << PAGE_SHIFT);
-
-	node_set(0, numa_nodes_parsed);
-	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
-
-	return 0;
-}
-
-static int __init numa_init(int (*init_func)(void))
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		set_apicid_to_node(i, NUMA_NO_NODE);
-
-	nodes_clear(numa_nodes_parsed);
-	nodes_clear(node_possible_map);
-	nodes_clear(node_online_map);
-	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
-	remove_all_active_ranges();
-	numa_reset_distance();
-
-	ret = init_func();
-	if (ret < 0)
-		return ret;
-	ret = numa_cleanup_meminfo(&numa_meminfo);
-	if (ret < 0)
-		return ret;
-
-	numa_emulation(&numa_meminfo, numa_distance_cnt);
-
-	ret = numa_register_memblks(&numa_meminfo);
-	if (ret < 0)
-		return ret;
-
-	for (i = 0; i < nr_cpu_ids; i++) {
-		int nid = early_cpu_to_node(i);
-
-		if (nid == NUMA_NO_NODE)
-			continue;
-		if (!node_online(nid))
-			numa_clear_node(i);
-	}
-	numa_init_array();
-	return 0;
-}
-
 void __init initmem_init(void)
 {
-	int ret;
-
-	if (!numa_off) {
-#ifdef CONFIG_ACPI_NUMA
-		ret = numa_init(x86_acpi_numa_init);
-		if (!ret)
-			return;
-#endif
-#ifdef CONFIG_AMD_NUMA
-		ret = numa_init(amd_numa_init);
-		if (!ret)
-			return;
-#endif
-	}
-
-	numa_init(dummy_numa_init);
+	x86_numa_init();
 }
 
 unsigned long __init numa_free_all_bootmem(void)
@@ -656,12 +23,3 @@ unsigned long __init numa_free_all_bootmem(void)
 
 	return pages;
 }
-
-int __cpuinit numa_cpu_node(int cpu)
-{
-	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-
-	if (apicid != BAD_APICID)
-		return __apicid_to_node[apicid];
-	return NUMA_NO_NODE;
-}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index ad091e4cff17..d0ed086b6247 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -5,6 +5,7 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 #include <asm/dma.h>
 
 #include "numa_internal.h"
@@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 		nr_nodes = MAX_NUMNODES;
 	}
 
-	size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
+	/*
+	 * Calculate target node size.  x86_32 freaks on __udivdi3() so do
+	 * the division in ulong number of pages and convert back.
+	 */
+	size = max_addr - addr - memblock_x86_hole_size(addr, max_addr);
+	size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
+
 	/*
 	 * Calculate the number of big nodes that can be allocated as a result
 	 * of consolidating the remainder.
@@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 	 */
 	while (nodes_weight(physnode_mask)) {
 		for_each_node_mask(i, physnode_mask) {
-			u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+			u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
 			u64 start, limit, end;
 			int phys_blk;
 
@@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 {
 	static struct numa_meminfo ei __initdata;
 	static struct numa_meminfo pi __initdata;
-	const u64 max_addr = max_pfn << PAGE_SHIFT;
+	const u64 max_addr = PFN_PHYS(max_pfn);
 	u8 *phys_dist = NULL;
 	size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
 	int max_emu_nid, dfl_phys_nid;
@@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	if (numa_dist_cnt) {
 		u64 phys;
 
-		phys = memblock_find_in_range(0,
-					      (u64)max_pfn_mapped << PAGE_SHIFT,
+		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 					      phys_size, PAGE_SIZE);
 		if (phys == MEMBLOCK_ERROR) {
 			pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
@@ -454,10 +460,9 @@ void __cpuinit numa_remove_cpu(int cpu)
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
 }
 #else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void __cpuinit numa_set_cpumask(int cpu, bool enable)
 {
-	struct cpumask *mask;
-	int nid, physnid, i;
+	int nid, physnid;
 
 	nid = early_cpu_to_node(cpu);
 	if (nid == NUMA_NO_NODE) {
@@ -467,28 +472,21 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
 
 	physnid = emu_nid_to_phys[nid];
 
-	for_each_online_node(i) {
+	for_each_online_node(nid) {
 		if (emu_nid_to_phys[nid] != physnid)
 			continue;
 
-		mask = debug_cpumask_set_cpu(cpu, enable);
-		if (!mask)
-			return;
-
-		if (enable)
-			cpumask_set_cpu(cpu, mask);
-		else
-			cpumask_clear_cpu(cpu, mask);
+		debug_cpumask_set_cpu(cpu, nid, enable);
 	}
 }
 
 void __cpuinit numa_add_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 1);
+	numa_set_cpumask(cpu, true);
 }
 
 void __cpuinit numa_remove_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 0);
+	numa_set_cpumask(cpu, false);
 }
 #endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index ef2d97377d7c..7178c3afe05e 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -19,6 +19,14 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
 int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
 void __init numa_reset_distance(void);
 
+void __init x86_numa_init(void);
+
+#ifdef CONFIG_X86_64
+static inline void init_alloc_remap(int nid, u64 start, u64 end)	{ }
+#else
+void __init init_alloc_remap(int nid, u64 start, u64 end);
+#endif
+
 #ifdef CONFIG_NUMA_EMU
 void __init numa_emulation(struct numa_meminfo *numa_meminfo,
 			   int numa_dist_cnt);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat.c
index 8e9d3394f6d4..81dbfdeb080d 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat.c
@@ -26,8 +26,6 @@
 
 int acpi_numa __initdata;
 
-static struct bootnode nodes_add[MAX_NUMNODES];
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -37,7 +35,6 @@ static __init void bad_srat(void)
 {
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	memset(nodes_add, 0, sizeof(nodes_add));
 }
 
 static __init inline int srat_disabled(void)
@@ -131,73 +128,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	       pxm, apic_id, node);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
-/*
- * Update nodes_add[]
- * This code supports one contiguous hot add area per node
- */
-static void __init
-update_nodes_add(int node, unsigned long start, unsigned long end)
-{
-	unsigned long s_pfn = start >> PAGE_SHIFT;
-	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int changed = 0;
-	struct bootnode *nd = &nodes_add[node];
-
-	/* I had some trouble with strange memory hotadd regions breaking
-	   the boot. Be very strict here and reject anything unexpected.
-	   If you want working memory hotadd write correct SRATs.
-
-	   The node size check is a basic sanity check to guard against
-	   mistakes */
-	if ((signed long)(end - start) < NODE_MIN_SIZE) {
-		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return;
-	}
-
-	/* This check might be a bit too strict, but I'm keeping it for now. */
-	if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
-		printk(KERN_ERR
-			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
-			s_pfn, e_pfn);
-		return;
-	}
-
-	/* Looks good */
-
-	if (nd->start == nd->end) {
-		nd->start = start;
-		nd->end = end;
-		changed = 1;
-	} else {
-		if (nd->start == end) {
-			nd->start = start;
-			changed = 1;
-		}
-		if (nd->end == start) {
-			nd->end = end;
-			changed = 1;
-		}
-		if (!changed)
-			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
-	}
-
-	if (changed) {
-		node_set(node, numa_nodes_parsed);
-		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
-				 nd->start, nd->end);
-	}
-}
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 void __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
-	unsigned long start, end;
+	u64 start, end;
 	int node, pxm;
 
 	if (srat_disabled())
@@ -226,11 +167,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		return;
 	}
 
-	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
+	printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
 	       start, end);
-
-	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
-		update_nodes_add(node, start, end);
 }
 
 void __init acpi_numa_arch_fixup(void) {}
@@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void)
 		return ret;
 	return srat_disabled() ? -EINVAL : 0;
 }
-
-#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
-int memory_add_physaddr_to_nid(u64 start)
-{
-	int i, ret = 0;
-
-	for_each_node(i)
-		if (nodes_add[i].start <= start && nodes_add[i].end > start)
-			ret = i;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
deleted file mode 100644
index 364f36bdfad8..000000000000
--- a/arch/x86/mm/srat_32.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit 
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.          
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/acpi.h>
-#include <linux/nodemask.h>
-#include <asm/srat.h>
-#include <asm/topology.h>
-#include <asm/smp.h>
-#include <asm/e820.h>
-
-/*
- * proximity macros and definitions
- */
-#define NODE_ARRAY_INDEX(x)	((x) / 8)	/* 8 bits/char */
-#define NODE_ARRAY_OFFSET(x)	((x) % 8)	/* 8 bits/char */
-#define BMAP_SET(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
-#define BMAP_TEST(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
-/* bitmap length; _PXM is at most 255 */
-#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
-static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
-
-#define MAX_CHUNKS_PER_NODE	3
-#define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
-struct node_memory_chunk_s {
-	unsigned long	start_pfn;
-	unsigned long	end_pfn;
-	u8	pxm;		// proximity domain of node
-	u8	nid;		// which cnode contains this chunk?
-	u8	bank;		// which mem bank on this node
-};
-static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
-
-static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
-
-int acpi_numa __initdata;
-
-static __init void bad_srat(void)
-{
-        printk(KERN_ERR "SRAT: SRAT not used.\n");
-        acpi_numa = -1;
-	num_memory_chunks = 0;
-}
-
-static __init inline int srat_disabled(void)
-{
-	return numa_off || acpi_numa < 0;
-}
-
-/* Identify CPU proximity domains */
-void __init
-acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
-{
-	if (srat_disabled())
-		return;
-	if (cpu_affinity->header.length !=
-	     sizeof(struct acpi_srat_cpu_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
-		return;		/* empty entry */
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
-
-	/* don't need to check apic_id here, because it is always 8 bits */
-	apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
-
-	printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
-		cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
-}
-
-/*
- * Identify memory proximity domains and hot-remove capabilities.
- * Fill node memory chunk list structure.
- */
-void __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
-{
-	unsigned long long paddr, size;
-	unsigned long start_pfn, end_pfn;
-	u8 pxm;
-	struct node_memory_chunk_s *p, *q, *pend;
-
-	if (srat_disabled())
-		return;
-	if (memory_affinity->header.length !=
-	     sizeof(struct acpi_srat_mem_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
-		return;		/* empty entry */
-
-	pxm = memory_affinity->proximity_domain & 0xff;
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, pxm);
-
-	/* calculate info for memory chunk structure */
-	paddr = memory_affinity->base_address;
-	size = memory_affinity->length;
-
-	start_pfn = paddr >> PAGE_SHIFT;
-	end_pfn = (paddr + size) >> PAGE_SHIFT;
-
-
-	if (num_memory_chunks >= MAXCHUNKS) {
-		printk(KERN_WARNING "Too many mem chunks in SRAT."
-			" Ignoring %lld MBytes at %llx\n",
-			size/(1024*1024), paddr);
-		return;
-	}
-
-	/* Insertion sort based on base address */
-	pend = &node_memory_chunk[num_memory_chunks];
-	for (p = &node_memory_chunk[0]; p < pend; p++) {
-		if (start_pfn < p->start_pfn)
-			break;
-	}
-	if (p < pend) {
-		for (q = pend; q >= p; q--)
-			*(q + 1) = *q;
-	}
-	p->start_pfn = start_pfn;
-	p->end_pfn = end_pfn;
-	p->pxm = pxm;
-
-	num_memory_chunks++;
-
-	printk(KERN_DEBUG "Memory range %08lx to %08lx"
-			  " in proximity domain %02x %s\n",
-		start_pfn, end_pfn,
-		pxm,
-		((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
-		 "enabled and removable" : "enabled" ) );
-}
-
-/* Callback for SLIT parsing */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-}
-
-void acpi_numa_arch_fixup(void)
-{
-}
-/*
- * The SRAT table always lists ascending addresses, so can always
- * assume that the first "start" address that you see is the real
- * start of the node, and that the current "end" address is after
- * the previous one.
- */
-static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
-{
-	/*
-	 * Only add present memory as told by the e820.
-	 * There is no guarantee from the SRAT that the memory it
-	 * enumerates is present at boot time because it represents
-	 * *possible* memory hotplug areas the same as normal RAM.
-	 */
-	if (memory_chunk->start_pfn >= max_pfn) {
-		printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
-			memory_chunk->start_pfn, memory_chunk->end_pfn);
-		return -1;
-	}
-	if (memory_chunk->nid != nid)
-		return -1;
-
-	if (!node_has_online_mem(nid))
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_start_pfn[nid] > memory_chunk->start_pfn)
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_end_pfn[nid] < memory_chunk->end_pfn)
-		node_end_pfn[nid] = memory_chunk->end_pfn;
-
-	return 0;
-}
-
-int __init get_memcfg_from_srat(void)
-{
-	int i, j, nid;
-
-	if (srat_disabled())
-		goto out_fail;
-
-	if (acpi_numa_init() < 0)
-		goto out_fail;
-
-	if (num_memory_chunks == 0) {
-		printk(KERN_DEBUG
-			 "could not find any ACPI SRAT memory areas.\n");
-		goto out_fail;
-	}
-
-	/* Calculate total number of nodes in system from PXM bitmap and create
-	 * a set of sequential node IDs starting at zero.  (ACPI doesn't seem
-	 * to specify the range of _PXM values.)
-	 */
-	/*
-	 * MCD - we no longer HAVE to number nodes sequentially.  PXM domain
-	 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
-	 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
-	 * approaches MAX_PXM_DOMAINS for i386.
-	 */
-	nodes_clear(node_online_map);
-	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
-		if (BMAP_TEST(pxm_bitmap, i)) {
-			int nid = acpi_map_pxm_to_node(i);
-			node_set_online(nid);
-		}
-	}
-	BUG_ON(num_online_nodes() == 0);
-
-	/* set cnode id in memory chunk structure */
-	for (i = 0; i < num_memory_chunks; i++)
-		node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
-
-	printk(KERN_DEBUG "pxm bitmap: ");
-	for (i = 0; i < sizeof(pxm_bitmap); i++) {
-		printk(KERN_CONT "%02x ", pxm_bitmap[i]);
-	}
-	printk(KERN_CONT "\n");
-	printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
-			 num_online_nodes());
-	printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
-			 num_memory_chunks);
-
-	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
-
-	for (j = 0; j < num_memory_chunks; j++){
-		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
-		printk(KERN_DEBUG
-			"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
-		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
-		if (node_read_chunk(chunk->nid, chunk))
-			continue;
-
-		memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn,
-					     min(chunk->end_pfn, max_pfn));
-	}
-	/* for out of order entries in SRAT */
-	sort_node_map();
-
-	for_each_online_node(nid) {
-		unsigned long start = node_start_pfn[nid];
-		unsigned long end = min(node_end_pfn[nid], max_pfn);
-
-		memory_present(nid, start, end);
-		node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
-	}
-	return 1;
-out_fail:
-	printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
-			" table\n");
-	return 0;
-}
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..a5b64ab4cd6e 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,17 +16,6 @@
 #include <asm/stacktrace.h>
 #include <linux/compat.h>
 
-static void backtrace_warning_symbol(void *data, char *msg,
-				     unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
 static int backtrace_stack(void *data, char *name)
 {
 	/* Yes, we want all stacks */
@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 }
 
 static struct stacktrace_ops backtrace_ops = {
-	.warning	= backtrace_warning,
-	.warning_symbol	= backtrace_warning_symbol,
 	.stack		= backtrace_stack,
 	.address	= backtrace_address,
 	.walk_stack	= print_context_stack,
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e37b407a0ee8..8214724ce54d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		}
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
 					       (type == PCI_CAP_ID_MSIX) ?
-					       "msi-x" : "msi");
+					       "msi-x" : "msi",
+					       DOMID_SELF);
 		if (irq < 0)
 			goto error;
 		dev_dbg(&dev->dev,
@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
 					       (type == PCI_CAP_ID_MSIX) ?
 					       "pcifront-msi-x" :
-					       "pcifront-msi");
+					       "pcifront-msi",
+						DOMID_SELF);
 		if (irq < 0)
 			goto free;
 		i++;
@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		struct physdev_map_pirq map_irq;
+		domid_t domid;
+
+		domid = ret = xen_find_device_domain_owner(dev);
+		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
+		 * hence check ret value for < 0. */
+		if (ret < 0)
+			domid = DOMID_SELF;
 
 		memset(&map_irq, 0, sizeof(map_irq));
-		map_irq.domid = DOMID_SELF;
+		map_irq.domid = domid;
 		map_irq.type = MAP_PIRQ_TYPE_MSI;
 		map_irq.index = -1;
 		map_irq.pirq = -1;
@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
 		if (ret) {
-			dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
+				 ret, domid);
 			goto out;
 		}
 
 		ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
 					       map_irq.pirq, map_irq.index,
 					       (type == PCI_CAP_ID_MSIX) ?
-					       "msi-x" : "msi");
+					       "msi-x" : "msi",
+						domid);
 		if (ret < 0)
 			goto out;
 	}
@@ -461,3 +472,78 @@ void __init xen_setup_pirqs(void)
 	}
 }
 #endif
+
+#ifdef CONFIG_XEN_DOM0
+struct xen_device_domain_owner {
+	domid_t domain;
+	struct pci_dev *dev;
+	struct list_head list;
+};
+
+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
+
+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	list_for_each_entry(owner, &dev_domain_list, list) {
+		if (owner->dev == dev)
+			return owner;
+	}
+	return NULL;
+}
+
+int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+	int domain = -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (owner)
+		domain = owner->domain;
+	spin_unlock(&dev_domain_list_spinlock);
+	return domain;
+}
+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
+
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+{
+	struct xen_device_domain_owner *owner;
+
+	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
+	if (!owner)
+		return -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	if (find_device(dev)) {
+		spin_unlock(&dev_domain_list_spinlock);
+		kfree(owner);
+		return -EEXIST;
+	}
+	owner->domain = domain;
+	owner->dev = dev;
+	list_add_tail(&owner->list, &dev_domain_list);
+	spin_unlock(&dev_domain_list_spinlock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
+
+int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (!owner) {
+		spin_unlock(&dev_domain_list_spinlock);
+		return -ENODEV;
+	}
+	list_del(&owner->list);
+	spin_unlock(&dev_domain_list_spinlock);
+	kfree(owner);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
+#endif
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index dc701ea58546..e70be38ce039 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -74,6 +74,7 @@
 				compatible = "intel,ce4100-pci", "pci";
 				device_type = "pci";
 				bus-range = <1 1>;
+				reg = <0x0800 0x0 0x0 0x0 0x0>;
 				ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>;
 
 				interrupt-parent = <&ioapic2>;
@@ -346,7 +347,7 @@
 						   "pciclass0c03";
 
 					reg = <0x16800 0x0 0x0 0x0 0x0>;
-					interrupts = <22 3>;
+					interrupts = <22 1>;
 				};
 
 				usb@d,1 {
@@ -356,7 +357,7 @@
 						   "pciclass0c03";
 
 					reg = <0x16900 0x0 0x0 0x0 0x0>;
-					interrupts = <22 3>;
+					interrupts = <22 1>;
 				};
 
 				sata@e,0 {
@@ -366,7 +367,7 @@
 						   "pciclass0106";
 
 					reg = <0x17000 0x0 0x0 0x0 0x0>;
-					interrupts = <23 3>;
+					interrupts = <23 1>;
 				};
 
 				flash@f,0 {
@@ -412,6 +413,7 @@
 				#address-cells = <2>;
 				#size-cells = <1>;
 				compatible = "isa";
+				reg = <0xf800 0x0 0x0 0x0 0x0>;
 				ranges = <1 0 0 0 0 0x100>;
 
 				rtc@70 {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0fe27d7c6258..b30aa26a8df2 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -145,17 +145,6 @@ static void virt_efi_reset_system(int reset_type,
 		       data_size, data);
 }
 
-static efi_status_t virt_efi_set_virtual_address_map(
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map)
-{
-	return efi_call_virt4(set_virtual_address_map,
-			      memory_map_size, descriptor_size,
-			      descriptor_version, virtual_map);
-}
-
 static efi_status_t __init phys_efi_set_virtual_address_map(
 	unsigned long memory_map_size,
 	unsigned long descriptor_size,
@@ -468,11 +457,25 @@ void __init efi_init(void)
 #endif
 }
 
+void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
+{
+	u64 addr, npages;
+
+	addr = md->virt_addr;
+	npages = md->num_pages;
+
+	memrange_efi_to_native(&addr, &npages);
+
+	if (executable)
+		set_memory_x(addr, npages);
+	else
+		set_memory_nx(addr, npages);
+}
+
 static void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
 	void *p;
-	u64 addr, npages;
 
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -481,10 +484,7 @@ static void __init runtime_code_page_mkexec(void)
 		if (md->type != EFI_RUNTIME_SERVICES_CODE)
 			continue;
 
-		addr = md->virt_addr;
-		npages = md->num_pages;
-		memrange_efi_to_native(&addr, &npages);
-		set_memory_x(addr, npages);
+		efi_set_executable(md, true);
 	}
 }
 
@@ -498,13 +498,42 @@ static void __init runtime_code_page_mkexec(void)
  */
 void __init efi_enter_virtual_mode(void)
 {
-	efi_memory_desc_t *md;
+	efi_memory_desc_t *md, *prev_md = NULL;
 	efi_status_t status;
 	unsigned long size;
 	u64 end, systab, addr, npages, end_pfn;
-	void *p, *va;
+	void *p, *va, *new_memmap = NULL;
+	int count = 0;
 
 	efi.systab = NULL;
+
+	/* Merge contiguous regions of the same type and attribute */
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		u64 prev_size;
+		md = p;
+
+		if (!prev_md) {
+			prev_md = md;
+			continue;
+		}
+
+		if (prev_md->type != md->type ||
+		    prev_md->attribute != md->attribute) {
+			prev_md = md;
+			continue;
+		}
+
+		prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;
+
+		if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
+			prev_md->num_pages += md->num_pages;
+			md->type = EFI_RESERVED_TYPE;
+			md->attribute = 0;
+			continue;
+		}
+		prev_md = md;
+	}
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
@@ -541,15 +570,21 @@ void __init efi_enter_virtual_mode(void)
 			systab += md->virt_addr - md->phys_addr;
 			efi.systab = (efi_system_table_t *) (unsigned long) systab;
 		}
+		new_memmap = krealloc(new_memmap,
+				      (count + 1) * memmap.desc_size,
+				      GFP_KERNEL);
+		memcpy(new_memmap + (count * memmap.desc_size), md,
+		       memmap.desc_size);
+		count++;
 	}
 
 	BUG_ON(!efi.systab);
 
 	status = phys_efi_set_virtual_address_map(
-		memmap.desc_size * memmap.nr_map,
+		memmap.desc_size * count,
 		memmap.desc_size,
 		memmap.desc_version,
-		memmap.phys_map);
+		(efi_memory_desc_t *)__pa(new_memmap));
 
 	if (status != EFI_SUCCESS) {
 		printk(KERN_ALERT "Unable to switch EFI into virtual mode "
@@ -572,11 +607,12 @@ void __init efi_enter_virtual_mode(void)
 	efi.set_variable = virt_efi_set_variable;
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
-	efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
+	efi.set_virtual_address_map = NULL;
 	if (__supported_pte_mask & _PAGE_NX)
 		runtime_code_page_mkexec();
 	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
 	memmap.map = NULL;
+	kfree(new_memmap);
 }
 
 /*
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac0621a7ac3d..2649426a7905 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -41,22 +41,7 @@
 static pgd_t save_pgd __initdata;
 static unsigned long efi_flags __initdata;
 
-static void __init early_mapping_set_exec(unsigned long start,
-					  unsigned long end,
-					  int executable)
-{
-	unsigned long num_pages;
-
-	start &= PMD_MASK;
-	end = (end + PMD_SIZE - 1) & PMD_MASK;
-	num_pages = (end - start) >> PAGE_SHIFT;
-	if (executable)
-		set_memory_x((unsigned long)__va(start), num_pages);
-	else
-		set_memory_nx((unsigned long)__va(start), num_pages);
-}
-
-static void __init early_runtime_code_mapping_set_exec(int executable)
+static void __init early_code_mapping_set_exec(int executable)
 {
 	efi_memory_desc_t *md;
 	void *p;
@@ -67,11 +52,8 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		if (md->type == EFI_RUNTIME_SERVICES_CODE) {
-			unsigned long end;
-			end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-			early_mapping_set_exec(md->phys_addr, end, executable);
-		}
+		if (md->type == EFI_RUNTIME_SERVICES_CODE)
+			efi_set_executable(md, executable);
 	}
 }
 
@@ -79,7 +61,7 @@ void __init efi_call_phys_prelog(void)
 {
 	unsigned long vaddress;
 
-	early_runtime_code_mapping_set_exec(1);
+	early_code_mapping_set_exec(1);
 	local_irq_save(efi_flags);
 	vaddress = (unsigned long)__va(0x0UL);
 	save_pgd = *pgd_offset_k(0x0UL);
@@ -95,7 +77,7 @@ void __init efi_call_phys_epilog(void)
 	set_pgd(pgd_offset_k(0x0UL), save_pgd);
 	__flush_tlb_all();
 	local_irq_restore(efi_flags);
-	early_runtime_code_mapping_set_exec(0);
+	early_code_mapping_set_exec(0);
 }
 
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
@@ -107,8 +89,10 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 		return ioremap(phys_addr, size);
 
 	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
-	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
-		return NULL;
+	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+		unsigned long top = last_map_pfn << PAGE_SHIFT;
+		efi_ioremap(top, size - (top - phys_addr), type);
+	}
 
 	return (void __iomem *)__va(phys_addr);
 }
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 5c0207bf959b..7000e74b3087 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -97,11 +97,11 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
 			pentry->freq_hz, pentry->irq);
 			if (!pentry->irq)
 				continue;
-			mp_irq.type = MP_IOAPIC;
+			mp_irq.type = MP_INTSRC;
 			mp_irq.irqtype = mp_INT;
 /* triggering mode edge bit 2-3, active high polarity bit 0-1 */
 			mp_irq.irqflag = 5;
-			mp_irq.srcbus = 0;
+			mp_irq.srcbus = MP_BUS_ISA;
 			mp_irq.srcbusirq = pentry->irq;	/* IRQ */
 			mp_irq.dstapic = MP_APIC_ALL;
 			mp_irq.dstirq = pentry->irq;
@@ -168,10 +168,10 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
 	for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
 		pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
 			totallen, (u32)pentry->phys_addr, pentry->irq);
-		mp_irq.type = MP_IOAPIC;
+		mp_irq.type = MP_INTSRC;
 		mp_irq.irqtype = mp_INT;
 		mp_irq.irqflag = 0xf;	/* level trigger and active low */
-		mp_irq.srcbus = 0;
+		mp_irq.srcbus = MP_BUS_ISA;
 		mp_irq.srcbusirq = pentry->irq;	/* IRQ */
 		mp_irq.dstapic = MP_APIC_ALL;
 		mp_irq.dstirq = pentry->irq;
@@ -194,7 +194,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
 	return 0;
 }
 
-void __init mrst_time_init(void)
+static void __init mrst_time_init(void)
 {
 	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
 	switch (mrst_timer_options) {
@@ -216,7 +216,7 @@ void __init mrst_time_init(void)
 	apbt_time_init();
 }
 
-void __cpuinit mrst_arch_setup(void)
+static void __cpuinit mrst_arch_setup(void)
 {
 	if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
 		__mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
@@ -282,7 +282,7 @@ void __init x86_mrst_early_setup(void)
 	/* Avoid searching for BIOS MP tables */
 	x86_init.mpparse.find_smp_config = x86_init_noop;
 	x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-
+	set_bit(MP_BUS_ISA, mp_bus_not_pci);
 }
 
 /*
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index c2a8cab65e5d..81c5e2165c24 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,2 @@
-obj-$(CONFIG_OLPC)		+= olpc.o
+obj-$(CONFIG_OLPC)		+= olpc.o olpc_ofw.o olpc_dt.o
 obj-$(CONFIG_OLPC_XO1)		+= olpc-xo1.o
-obj-$(CONFIG_OLPC)		+= olpc_ofw.o
-obj-$(CONFIG_OF_PROMTREE)	+= olpc_dt.o
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index edaf3fe8dc5e..0060fd59ea00 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/string.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 
 #include <asm/geode.h>
 #include <asm/setup.h>
@@ -187,41 +188,43 @@ err:
 }
 EXPORT_SYMBOL_GPL(olpc_ec_cmd);
 
-static bool __init check_ofw_architecture(void)
+static bool __init check_ofw_architecture(struct device_node *root)
 {
-	size_t propsize;
-	char olpc_arch[5];
-	const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
-	void *res[] = { &propsize };
+	const char *olpc_arch;
+	int propsize;
 
-	if (olpc_ofw("getprop", args, res)) {
-		printk(KERN_ERR "ofw: getprop call failed!\n");
-		return false;
-	}
+	olpc_arch = of_get_property(root, "architecture", &propsize);
 	return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
 }
 
-static u32 __init get_board_revision(void)
+static u32 __init get_board_revision(struct device_node *root)
 {
-	size_t propsize;
-	__be32 rev;
-	const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 };
-	void *res[] = { &propsize };
-
-	if (olpc_ofw("getprop", args, res) || propsize != 4) {
-		printk(KERN_ERR "ofw: getprop call failed!\n");
-		return cpu_to_be32(0);
-	}
-	return be32_to_cpu(rev);
+	int propsize;
+	const __be32 *rev;
+
+	rev = of_get_property(root, "board-revision-int", &propsize);
+	if (propsize != 4)
+		return 0;
+
+	return be32_to_cpu(*rev);
 }
 
 static bool __init platform_detect(void)
 {
-	if (!check_ofw_architecture())
+	struct device_node *root = of_find_node_by_path("/");
+	bool success;
+
+	if (!root)
 		return false;
-	olpc_platform_info.flags |= OLPC_F_PRESENT;
-	olpc_platform_info.boardrev = get_board_revision();
-	return true;
+
+	success = check_ofw_architecture(root);
+	if (success) {
+		olpc_platform_info.boardrev = get_board_revision(root);
+		olpc_platform_info.flags |= OLPC_F_PRESENT;
+	}
+
+	of_node_put(root);
+	return success;
 }
 
 static int __init add_xo1_platform_devices(void)
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 044bda5b3174..d39f63d017d2 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -19,7 +19,9 @@
 #include <linux/kernel.h>
 #include <linux/bootmem.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/of_pdt.h>
+#include <asm/olpc.h>
 #include <asm/olpc_ofw.h>
 
 static phandle __init olpc_dt_getsibling(phandle node)
@@ -180,3 +182,20 @@ void __init olpc_dt_build_devicetree(void)
 	pr_info("PROM DT: Built device tree with %u bytes of memory.\n",
 			prom_early_allocated);
 }
+
+/* A list of DT node/bus matches that we want to expose as platform devices */
+static struct of_device_id __initdata of_ids[] = {
+	{ .compatible = "olpc,xo1-battery" },
+	{ .compatible = "olpc,xo1-dcon" },
+	{ .compatible = "olpc,xo1-rtc" },
+	{},
+};
+
+static int __init olpc_create_platform_devices(void)
+{
+	if (machine_is_olpc())
+		return of_platform_bus_probe(NULL, of_ids, NULL);
+	else
+		return 0;
+}
+device_initcall(olpc_create_platform_devices);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f6..c58e0ea39ef5 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 					  struct mm_struct *mm,
 					  unsigned long va, unsigned int cpu)
 {
-	int tcpu;
-	int uvhub;
 	int locals = 0;
 	int remotes = 0;
 	int hubs = 0;
+	int tcpu;
+	int tpnode;
 	struct bau_desc *bau_desc;
 	struct cpumask *flush_mask;
 	struct ptc_stats *stat;
 	struct bau_control *bcp;
 	struct bau_control *tbcp;
+	struct hub_and_pnode *hpp;
 
 	/* kernel was booted 'nobau' */
 	if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
 	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 
-	/* cpu statistics */
 	for_each_cpu(tcpu, flush_mask) {
-		uvhub = uv_cpu_to_blade_id(tcpu);
-		bau_uvhub_set(uvhub, &bau_desc->distribution);
-		if (uvhub == bcp->uvhub)
+		/*
+		 * The distribution vector is a bit map of pnodes, relative
+		 * to the partition base pnode (and the partition base nasid
+		 * in the header).
+		 * Translate cpu to pnode and hub using an array stored
+		 * in local memory.
+		 */
+		hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
+		tpnode = hpp->pnode - bcp->partition_base_pnode;
+		bau_uvhub_set(tpnode, &bau_desc->distribution);
+		if (hpp->uvhub == bcp->uvhub)
 			locals++;
 		else
 			remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
  * an interrupt, but causes an error message to be returned to
  * the sender.
  */
-static void uv_enable_timeouts(void)
+static void __init uv_enable_timeouts(void)
 {
 	int uvhub;
 	int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
 }
 
 /*
- * initialize the sending side's sending buffers
+ * Initialize the sending side's sending buffers.
  */
 static void
-uv_activation_descriptor_init(int node, int pnode)
+uv_activation_descriptor_init(int node, int pnode, int base_pnode)
 {
 	int i;
 	int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
 	n = pa >> uv_nshift;
 	m = pa & uv_mmask;
 
+	/* the 14-bit pnode */
 	uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
 			      (n << UV_DESC_BASE_PNODE_SHIFT | m));
-
 	/*
-	 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+	 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
 	 * cpu even though we only use the first one; one descriptor can
 	 * describe a broadcast to 256 uv hubs.
 	 */
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
 		memset(bd2, 0, sizeof(struct bau_desc));
 		bd2->header.sw_ack_flag = 1;
 		/*
-		 * base_dest_nodeid is the nasid of the first uvhub
-		 * in the partition. The bit map will indicate uvhub numbers,
-		 * which are 0-N in a partition. Pnodes are unique system-wide.
+		 * The base_dest_nasid set in the message header is the nasid
+		 * of the first uvhub in the partition. The bit map will
+		 * indicate destination pnode numbers relative to that base.
+		 * They may not be consecutive if nasid striding is being used.
 		 */
-		bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
-		bd2->header.dest_subnodeid = 0x10; /* the LB */
+		bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
+		bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
 		bd2->header.command = UV_NET_ENDPOINT_INTD;
 		bd2->header.int_both = 1;
 		/*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
 /*
  * Initialization of each UV hub's structures
  */
-static void __init uv_init_uvhub(int uvhub, int vector)
+static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
 {
 	int node;
 	int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
 
 	node = uvhub_to_first_node(uvhub);
 	pnode = uv_blade_to_pnode(uvhub);
-	uv_activation_descriptor_init(node, pnode);
+	uv_activation_descriptor_init(node, pnode, base_pnode);
 	uv_payload_queue_init(node, pnode);
 	/*
-	 * the below initialization can't be in firmware because the
-	 * messaging IRQ will be determined by the OS
+	 * The below initialization can't be in firmware because the
+	 * messaging IRQ will be determined by the OS.
 	 */
 	apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
 	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
 /*
  * initialize the bau_control structure for each cpu
  */
-static int __init uv_init_per_cpu(int nuvhubs)
+static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
 {
 	int i;
 	int cpu;
+	int tcpu;
 	int pnode;
 	int uvhub;
 	int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
 		bcp = &per_cpu(bau_control, cpu);
 		memset(bcp, 0, sizeof(struct bau_control));
 		pnode = uv_cpu_hub_info(cpu)->pnode;
+		if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
+			printk(KERN_EMERG
+				"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
+				cpu, pnode, base_part_pnode,
+				UV_DISTRIBUTION_SIZE);
+			return 1;
+		}
+		bcp->osnode = cpu_to_node(cpu);
+		bcp->partition_base_pnode = uv_partition_base_pnode;
 		uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
 		*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
 		bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
 		bdp->pnode = pnode;
 		/* kludge: 'assuming' one node per socket, and assuming that
 		   disabling a socket just leaves a gap in node numbers */
-		socket = (cpu_to_node(cpu) & 1);
+		socket = bcp->osnode & 1;
 		bdp->socket_mask |= (1 << socket);
 		sdp = &bdp->socket[socket];
 		sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
 nextsocket:
 			socket++;
 			socket_mask = (socket_mask >> 1);
+			/* each socket gets a local array of pnodes/hubs */
+			bcp = smaster;
+			bcp->target_hub_and_pnode = kmalloc_node(
+				sizeof(struct hub_and_pnode) *
+				num_possible_cpus(), GFP_KERNEL, bcp->osnode);
+			memset(bcp->target_hub_and_pnode, 0,
+				sizeof(struct hub_and_pnode) *
+				num_possible_cpus());
+			for_each_present_cpu(tcpu) {
+				bcp->target_hub_and_pnode[tcpu].pnode =
+					uv_cpu_hub_info(tcpu)->pnode;
+				bcp->target_hub_and_pnode[tcpu].uvhub =
+					uv_cpu_hub_info(tcpu)->numa_blade_id;
+			}
 		}
 	}
 	kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
 	spin_lock_init(&disable_lock);
 	congested_cycles = microsec_2_cycles(congested_response_us);
 
-	if (uv_init_per_cpu(nuvhubs)) {
-		nobau = 1;
-		return 0;
-	}
-
 	uv_partition_base_pnode = 0x7fffffff;
-	for (uvhub = 0; uvhub < nuvhubs; uvhub++)
+	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 		if (uv_blade_nr_possible_cpus(uvhub) &&
 			(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
 			uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
+	}
+
+	if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
+		nobau = 1;
+		return 0;
+	}
 
 	vector = UV_BAU_MESSAGE;
 	for_each_possible_blade(uvhub)
 		if (uv_blade_nr_possible_cpus(uvhub))
-			uv_init_uvhub(uvhub, vector);
+			uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
 
 	uv_enable_timeouts();
 	alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9daf5d1af9f1..0eb90184515f 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -40,7 +40,6 @@ static struct clocksource clocksource_uv = {
 	.rating		= 400,
 	.read		= uv_read_rtc,
 	.mask		= (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -372,14 +371,11 @@ static __init int uv_rtc_setup_clock(void)
 	if (!is_uv_system())
 		return -ENODEV;
 
-	clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
-				clocksource_uv.shift);
-
 	/* If single blade, prefer tsc */
 	if (uv_num_possible_blades() == 1)
 		clocksource_uv.rating = 250;
 
-	rc = clocksource_register(&clocksource_uv);
+	rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
 	if (rc)
 		printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
 	else
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1c7121ba18ff..5cc821cb2e09 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -39,6 +39,7 @@ config XEN_MAX_DOMAIN_MEMORY
 config XEN_SAVE_RESTORE
        bool
        depends on XEN
+       select HIBERNATE_CALLBACKS
        default y
 
 config XEN_DEBUG_FS
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 49dbd78ec3cb..dd7b88f2ec7a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -235,9 +235,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	*dx &= maskedx;
 }
 
-static __init void xen_init_cpuid_mask(void)
+static void __init xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
+	unsigned int xsave_mask;
 
 	cpuid_leaf1_edx_mask =
 		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
@@ -249,24 +250,16 @@ static __init void xen_init_cpuid_mask(void)
 		cpuid_leaf1_edx_mask &=
 			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
 			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
-
 	ax = 1;
-	cx = 0;
 	xen_cpuid(&ax, &bx, &cx, &dx);
 
-	/* cpuid claims we support xsave; try enabling it to see what happens */
-	if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
-		unsigned long cr4;
-
-		set_in_cr4(X86_CR4_OSXSAVE);
-		
-		cr4 = read_cr4();
+	xsave_mask =
+		(1 << (X86_FEATURE_XSAVE % 32)) |
+		(1 << (X86_FEATURE_OSXSAVE % 32));
 
-		if ((cr4 & X86_CR4_OSXSAVE) == 0)
-			cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
-
-		clear_in_cr4(X86_CR4_OSXSAVE);
-	}
+	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
+	if ((cx & xsave_mask) != xsave_mask)
+		cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
 }
 
 static void xen_set_debugreg(int reg, unsigned long val)
@@ -407,7 +400,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 /*
  * load_gdt for early boot, when the gdt is only mapped once
  */
-static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 {
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
@@ -669,7 +662,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
  * Version of write_gdt_entry for use at early boot-time needed to
  * update an entry as simply as possible.
  */
-static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
+static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
 					    const void *desc, int type)
 {
 	switch (type) {
@@ -940,18 +933,18 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 	return ret;
 }
 
-static const struct pv_info xen_info __initdata = {
+static const struct pv_info xen_info __initconst = {
 	.paravirt_enabled = 1,
 	.shared_kernel_pmd = 0,
 
 	.name = "Xen",
 };
 
-static const struct pv_init_ops xen_init_ops __initdata = {
+static const struct pv_init_ops xen_init_ops __initconst = {
 	.patch = xen_patch,
 };
 
-static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.cpuid = xen_cpuid,
 
 	.set_debugreg = xen_set_debugreg,
@@ -1011,7 +1004,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.end_context_switch = xen_end_context_switch,
 };
 
-static const struct pv_apic_ops xen_apic_ops __initdata = {
+static const struct pv_apic_ops xen_apic_ops __initconst = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,
 #endif
@@ -1062,7 +1055,7 @@ int xen_panic_handler_init(void)
 	return 0;
 }
 
-static const struct machine_ops __initdata xen_machine_ops = {
+static const struct machine_ops xen_machine_ops __initconst = {
 	.restart = xen_restart,
 	.halt = xen_machine_halt,
 	.power_off = xen_machine_halt,
@@ -1339,7 +1332,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
+static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
 	.notifier_call	= xen_hvm_cpu_notify,
 };
 
@@ -1388,7 +1381,7 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
 	.name			= "Xen HVM",
 	.detect			= xen_hvm_platform,
 	.init_platform		= xen_hvm_guest_init,
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 6a6fe8939645..8bbb465b6f0a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -113,7 +113,7 @@ static void xen_halt(void)
 		xen_safe_halt();
 }
 
-static const struct pv_irq_ops xen_irq_ops __initdata = {
+static const struct pv_irq_ops xen_irq_ops __initconst = {
 	.save_fl = PV_CALLEE_SAVE(xen_save_fl),
 	.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
 	.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c82df6c9c0f0..02d752460371 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -565,13 +565,13 @@ pte_t xen_make_pte_debug(pteval_t pte)
 	if (io_page &&
 	    (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
 		other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
-		WARN(addr != other_addr,
+		WARN_ONCE(addr != other_addr,
 			"0x%lx is using VM_IO, but it is 0x%lx!\n",
 			(unsigned long)addr, (unsigned long)other_addr);
 	} else {
 		pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
 		other_addr = (_pte.pte & PTE_PFN_MASK);
-		WARN((addr == other_addr) && (!io_page) && (!iomap_set),
+		WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
 			"0x%lx is missing VM_IO (and wasn't fixed)!\n",
 			(unsigned long)addr);
 	}
@@ -1054,7 +1054,7 @@ void xen_mm_pin_all(void)
  * that's before we have page structures to store the bits.  So do all
  * the book-keeping now.
  */
-static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
+static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 				  enum pt_level level)
 {
 	SetPagePinned(page);
@@ -1187,7 +1187,7 @@ static void drop_other_mm_ref(void *info)
 
 	active_mm = percpu_read(cpu_tlbstate.active_mm);
 
-	if (active_mm == mm)
+	if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
 		leave_mm(smp_processor_id());
 
 	/* If this cpu still has a stale cr3 reference, then make sure
@@ -1271,13 +1271,27 @@ void xen_exit_mmap(struct mm_struct *mm)
 	spin_unlock(&mm->page_table_lock);
 }
 
-static __init void xen_pagetable_setup_start(pgd_t *base)
+static void __init xen_pagetable_setup_start(pgd_t *base)
 {
 }
 
+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+	/* reserve the range used */
+	native_pagetable_reserve(start, end);
+
+	/* set as RW the rest */
+	printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+			PFN_PHYS(pgt_buf_top));
+	while (end < PFN_PHYS(pgt_buf_top)) {
+		make_lowmem_page_readwrite(__va(end));
+		end += PAGE_SIZE;
+	}
+}
+
 static void xen_post_allocator_init(void);
 
-static __init void xen_pagetable_setup_done(pgd_t *base)
+static void __init xen_pagetable_setup_done(pgd_t *base)
 {
 	xen_setup_shared_info();
 	xen_post_allocator_init();
@@ -1473,16 +1487,20 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 #endif
 }
 
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
-{
-	unsigned long pfn = pte_pfn(pte);
-
 #ifdef CONFIG_X86_32
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
+{
 	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
 	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
 		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
 			       pte_val_ma(pte));
-#endif
+
+	return pte;
+}
+#else /* CONFIG_X86_64 */
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
+{
+	unsigned long pfn = pte_pfn(pte);
 
 	/*
 	 * If the new pfn is within the range of the newly allocated
@@ -1491,16 +1509,17 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 	 * it is RO.
 	 */
 	if (((!is_early_ioremap_ptep(ptep) &&
-			pfn >= pgt_buf_start && pfn < pgt_buf_end)) ||
+			pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
 			(is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
 		pte = pte_wrprotect(pte);
 
 	return pte;
 }
+#endif /* CONFIG_X86_64 */
 
 /* Init-time set_pte while constructing initial pagetables, which
    doesn't allow RO pagetable pages to be remapped RW */
-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 {
 	pte = mask_rw_pte(ptep, pte);
 
@@ -1518,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -1528,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 }
 
 /* Used for pmd and pud */
-static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -1538,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static __init void xen_release_pte_init(unsigned long pfn)
+static void __init xen_release_pte_init(unsigned long pfn)
 {
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
-static __init void xen_release_pmd_init(unsigned long pfn)
+static void __init xen_release_pmd_init(unsigned long pfn)
 {
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
@@ -1670,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
 		BUG();
 }
 
-static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
 	unsigned ident_pte;
@@ -1753,7 +1772,7 @@ static void convert_pfn_mfn(void *v)
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pud_t *l3;
@@ -1824,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
 static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
 
-static __init void xen_write_cr3_init(unsigned long cr3)
+static void __init xen_write_cr3_init(unsigned long cr3)
 {
 	unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
 
@@ -1861,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3)
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 }
 
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
@@ -1967,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
 }
 
-__init void xen_ident_map_ISA(void)
+void __init xen_ident_map_ISA(void)
 {
 	unsigned long pa;
 
@@ -1990,7 +2009,7 @@ __init void xen_ident_map_ISA(void)
 	xen_flush_tlb();
 }
 
-static __init void xen_post_allocator_init(void)
+static void __init xen_post_allocator_init(void)
 {
 #ifdef CONFIG_XEN_DEBUG
 	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
@@ -2027,7 +2046,7 @@ static void xen_leave_lazy_mmu(void)
 	preempt_enable();
 }
 
-static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.read_cr2 = xen_read_cr2,
 	.write_cr2 = xen_write_cr2,
 
@@ -2100,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 void __init xen_init_mmu_ops(void)
 {
+	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
 	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
 	pv_mmu_ops = xen_mmu_ops;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 141eb0de8b06..58efeb9d5440 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -522,11 +522,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn)
 	/* Boundary cross-over for the edges: */
 	if (idx) {
 		unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+		unsigned long *mid_mfn_p;
 
 		p2m_init(p2m);
 
 		p2m_top[topidx][mididx] = p2m;
 
+		/* For save/restore we need to MFN of the P2M saved */
+		
+		mid_mfn_p = p2m_top_mfn_p[topidx];
+		WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
+			"P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
+			topidx, mididx);
+		mid_mfn_p[mididx] = virt_to_mfn(p2m);
+
 	}
 	return idx != 0;
 }
@@ -549,12 +558,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 		pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
 	{
 		unsigned topidx = p2m_top_index(pfn);
-		if (p2m_top[topidx] == p2m_mid_missing) {
-			unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+		unsigned long *mid_mfn_p;
+		unsigned long **mid;
+
+		mid = p2m_top[topidx];
+		mid_mfn_p = p2m_top_mfn_p[topidx];
+		if (mid == p2m_mid_missing) {
+			mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
 
 			p2m_mid_init(mid);
 
 			p2m_top[topidx] = mid;
+
+			BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
+		}
+		/* And the save/restore P2M tables.. */
+		if (mid_mfn_p == p2m_mid_missing_mfn) {
+			mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+			p2m_mid_mfn_init(mid_mfn_p);
+
+			p2m_top_mfn_p[topidx] = mid_mfn_p;
+			p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
+			/* Note: we don't set mid_mfn_p[midix] here,
+		 	 * look in __early_alloc_p2m */
 		}
 	}
 
@@ -650,7 +676,7 @@ static unsigned long mfn_hash(unsigned long mfn)
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page)
+int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
 {
 	unsigned long flags;
 	unsigned long pfn;
@@ -662,7 +688,6 @@ int m2p_add_override(unsigned long mfn, struct page *page)
 	if (!PageHighMem(page)) {
 		address = (unsigned long)__va(pfn << PAGE_SHIFT);
 		ptep = lookup_address(address, &level);
-
 		if (WARN(ptep == NULL || level != PG_LEVEL_4K,
 					"m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
@@ -674,18 +699,17 @@ int m2p_add_override(unsigned long mfn, struct page *page)
 	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
 		return -ENOMEM;
 
-	if (!PageHighMem(page))
+	if (clear_pte && !PageHighMem(page))
 		/* Just zap old mapping for now */
 		pte_clear(&init_mm, address, ptep);
-
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
 
 	return 0;
 }
-
-int m2p_remove_override(struct page *page)
+EXPORT_SYMBOL_GPL(m2p_add_override);
+int m2p_remove_override(struct page *page, bool clear_pte)
 {
 	unsigned long flags;
 	unsigned long mfn;
@@ -713,7 +737,7 @@ int m2p_remove_override(struct page *page)
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
 	set_phys_to_machine(pfn, page->index);
 
-	if (!PageHighMem(page))
+	if (clear_pte && !PageHighMem(page))
 		set_pte_at(&init_mm, address, ptep,
 				pfn_pte(pfn, PAGE_KERNEL));
 		/* No tlb flush necessary because the caller already
@@ -721,6 +745,7 @@ int m2p_remove_override(struct page *page)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(m2p_remove_override);
 
 struct page *m2p_find_override(unsigned long mfn)
 {
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index fa0269a99377..be1a464f6d66 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -50,7 +50,7 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
  */
 #define EXTRA_MEM_RATIO		(10)
 
-static __init void xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(unsigned long pages)
 {
 	unsigned long pfn;
 
@@ -166,7 +166,7 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
 		if (last > end)
 			continue;
 
-		if (entry->type == E820_RAM) {
+		if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
 			if (start > start_pci)
 				identity += set_phys_range_identity(
 						PFN_UP(start_pci), PFN_DOWN(start));
@@ -227,7 +227,11 @@ char * __init xen_memory_setup(void)
 
 	memcpy(map_raw, map, sizeof(map));
 	e820.nr_map = 0;
+#ifdef CONFIG_X86_32
 	xen_extra_mem_start = mem_end;
+#else
+	xen_extra_mem_start = max((1ULL << 32), mem_end);
+#endif
 	for (i = 0; i < memmap.nr_entries; i++) {
 		unsigned long long end;
 
@@ -336,7 +340,7 @@ static void __init fiddle_vdso(void)
 #endif
 }
 
-static __cpuinit int register_callback(unsigned type, const void *func)
+static int __cpuinit register_callback(unsigned type, const void *func)
 {
 	struct callback_register callback = {
 		.type = type,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 30612441ed99..41038c01de40 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -46,18 +46,17 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
 
-static __cpuinit void cpu_bringup(void)
+static void __cpuinit cpu_bringup(void)
 {
 	int cpu = smp_processor_id();
 
@@ -85,7 +84,7 @@ static __cpuinit void cpu_bringup(void)
 	wmb();			/* make sure everything is out */
 }
 
-static __cpuinit void cpu_bringup_and_idle(void)
+static void __cpuinit cpu_bringup_and_idle(void)
 {
 	cpu_bringup();
 	cpu_idle();
@@ -242,7 +241,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	}
 }
 
-static __cpuinit int
+static int __cpuinit
 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 {
 	struct vcpu_guest_context *ctxt;
@@ -486,7 +485,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static const struct smp_ops xen_smp_ops __initdata = {
+static const struct smp_ops xen_smp_ops __initconst = {
 	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = xen_smp_prepare_cpus,
 	.smp_cpus_done = xen_smp_cpus_done,
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 2e2d370a47b1..5158c505bef9 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -26,8 +26,6 @@
 
 #include "xen-ops.h"
 
-#define XEN_SHIFT 22
-
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP	100000
 #define NS_PER_TICK	(1000000000LL / HZ)
@@ -211,8 +209,6 @@ static struct clocksource xen_clocksource __read_mostly = {
 	.rating = 400,
 	.read = xen_clocksource_get_cycles,
 	.mask = ~0,
-	.mult = 1<<XEN_SHIFT,		/* time directly in nanoseconds */
-	.shift = XEN_SHIFT,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -439,16 +435,16 @@ void xen_timer_resume(void)
 	}
 }
 
-static const struct pv_time_ops xen_time_ops __initdata = {
+static const struct pv_time_ops xen_time_ops __initconst = {
 	.sched_clock = xen_clocksource_read,
 };
 
-static __init void xen_time_init(void)
+static void __init xen_time_init(void)
 {
 	int cpu = smp_processor_id();
 	struct timespec tp;
 
-	clocksource_register(&xen_clocksource);
+	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
 		/* Successfully turned off 100Hz tick, so we have the
@@ -468,7 +464,7 @@ static __init void xen_time_init(void)
 	xen_setup_cpu_clockevents();
 }
 
-__init void xen_init_time_ops(void)
+void __init xen_init_time_ops(void)
 {
 	pv_time_ops = xen_time_ops;
 
@@ -490,7 +486,7 @@ static void xen_hvm_setup_cpu_clockevents(void)
 	xen_setup_cpu_clockevents();
 }
 
-__init void xen_hvm_init_time_ops(void)
+void __init xen_hvm_init_time_ops(void)
 {
 	/* vector callback is needed otherwise we cannot receive interrupts
 	 * on cpu > 0 and at this point we don't know how many cpus are
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3112f55638c4..97dfdc8757b3 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -74,7 +74,7 @@ static inline void xen_hvm_smp_init(void) {}
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init xen_init_spinlocks(void);
-__cpuinit void xen_init_lock_cpu(int cpu);
+void __cpuinit xen_init_lock_cpu(int cpu);
 void xen_uninit_lock_cpu(int cpu);
 #else
 static inline void xen_init_spinlocks(void)
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index d77089df412e..4340ee076bd5 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -64,47 +64,41 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
-	int j;
-
-	seq_printf(p, "%*s: ", prec, "NMI");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", nmi_count(j));
-	seq_putc(p, '\n');
 	seq_printf(p, "%*s: ", prec, "ERR");
 	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
 	return 0;
 }
 
-static void xtensa_irq_mask(struct irq_chip *d)
+static void xtensa_irq_mask(struct irq_data *d)
 {
 	cached_irq_mask &= ~(1 << d->irq);
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static void xtensa_irq_unmask(struct irq_chip *d)
+static void xtensa_irq_unmask(struct irq_data *d)
 {
 	cached_irq_mask |= 1 << d->irq;
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static void xtensa_irq_enable(struct irq_chip *d)
+static void xtensa_irq_enable(struct irq_data *d)
 {
 	variant_irq_enable(d->irq);
 	xtensa_irq_unmask(d->irq);
 }
 
-static void xtensa_irq_disable(struct irq_chip *d)
+static void xtensa_irq_disable(struct irq_data *d)
 {
 	xtensa_irq_mask(d->irq);
 	variant_irq_disable(d->irq);
 }
 
-static void xtensa_irq_ack(struct irq_chip *d)
+static void xtensa_irq_ack(struct irq_data *d)
 {
 	set_sr(1 << d->irq, INTCLEAR);
 }
 
-static int xtensa_irq_retrigger(struct irq_chip *d)
+static int xtensa_irq_retrigger(struct irq_data *d)
 {
 	set_sr (1 << d->irq, INTSET);
 	return 1;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f0605ab2a761..471fdcc5df85 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -114,6 +114,13 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 }
 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
 
+struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
+{
+	return container_of(task_subsys_state(tsk, blkio_subsys_id),
+			    struct blkio_cgroup, css);
+}
+EXPORT_SYMBOL_GPL(task_blkio_cgroup);
+
 static inline void
 blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
 {
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 10919fae2d3a..c774930cc206 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -291,6 +291,7 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
 #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
+extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
 extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
 	struct blkio_group *blkg, void *key, dev_t dev,
 	enum blkio_policy_id plid);
@@ -314,6 +315,8 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 struct cgroup;
 static inline struct blkio_cgroup *
 cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
+static inline struct blkio_cgroup *
+task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
 
 static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
 		struct blkio_group *blkg, void *key, dev_t dev,
diff --git a/block/blk-core.c b/block/blk-core.c
index 90f22cc30799..3fe00a14822a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -198,26 +198,13 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 
-/*
- * Make sure that plugs that were pending when this function was entered,
- * are now complete and requests pushed to the queue.
-*/
-static inline void queue_sync_plugs(struct request_queue *q)
-{
-	/*
-	 * If the current process is plugged and has barriers submitted,
-	 * we will livelock if we don't unplug first.
-	 */
-	blk_flush_plug(current);
-}
-
 static void blk_delay_work(struct work_struct *work)
 {
 	struct request_queue *q;
 
 	q = container_of(work, struct request_queue, delay_work.work);
 	spin_lock_irq(q->queue_lock);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irq(q->queue_lock);
 }
 
@@ -233,7 +220,8 @@ static void blk_delay_work(struct work_struct *work)
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
-	schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
+	queue_delayed_work(kblockd_workqueue, &q->delay_work,
+				msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_delay_queue);
 
@@ -251,7 +239,7 @@ void blk_start_queue(struct request_queue *q)
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -298,38 +286,44 @@ void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->timeout);
 	cancel_delayed_work_sync(&q->delay_work);
-	queue_sync_plugs(q);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
 /**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
- * @force_kblockd: Don't run @q->request_fn directly.  Use kblockd.
  *
  * Description:
  *    See @blk_run_queue. This variant must be called with the queue lock
  *    held and interrupts disabled.
- *
  */
-void __blk_run_queue(struct request_queue *q, bool force_kblockd)
+void __blk_run_queue(struct request_queue *q)
 {
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	/*
-	 * Only recurse once to avoid overrunning the stack, let the unplug
-	 * handling reinvoke the handler shortly if we already got there.
-	 */
-	if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else
-		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
 /**
+ * blk_run_queue_async - run a single device queue in workqueue context
+ * @q:	The queue to run
+ *
+ * Description:
+ *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
+ *    of us.
+ */
+void blk_run_queue_async(struct request_queue *q)
+{
+	if (likely(!blk_queue_stopped(q))) {
+		__cancel_delayed_work(&q->delay_work);
+		queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+	}
+}
+EXPORT_SYMBOL(blk_run_queue_async);
+
+/**
  * blk_run_queue - run a single device queue
  * @q: The queue to run
  *
@@ -342,7 +336,7 @@ void blk_run_queue(struct request_queue *q)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
@@ -991,7 +985,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 		blk_queue_end_tag(q, rq);
 
 	add_acct_request(q, rq, where);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
@@ -1311,7 +1305,15 @@ get_rq:
 
 	plug = current->plug;
 	if (plug) {
-		if (!plug->should_sort && !list_empty(&plug->list)) {
+		/*
+		 * If this is the first request added after a plug, fire
+		 * of a plug trace. If others have been added before, check
+		 * if we have multiple devices in this plug. If so, make a
+		 * note to sort the list before dispatch.
+		 */
+		if (list_empty(&plug->list))
+			trace_block_plug(q);
+		else if (!plug->should_sort) {
 			struct request *__rq;
 
 			__rq = list_entry_rq(plug->list.prev);
@@ -1327,7 +1329,7 @@ get_rq:
 	} else {
 		spin_lock_irq(q->queue_lock);
 		add_acct_request(q, req, where);
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
@@ -2644,6 +2646,7 @@ void blk_start_plug(struct blk_plug *plug)
 
 	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
+	INIT_LIST_HEAD(&plug->cb_list);
 	plug->should_sort = 0;
 
 	/*
@@ -2668,33 +2671,93 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return !(rqa->q <= rqb->q);
 }
 
-static void flush_plug_list(struct blk_plug *plug)
+/*
+ * If 'from_schedule' is true, then postpone the dispatch of requests
+ * until a safe kblockd context. We due this to avoid accidental big
+ * additional stack usage in driver dispatch, in places where the originally
+ * plugger did not intend it.
+ */
+static void queue_unplugged(struct request_queue *q, unsigned int depth,
+			    bool from_schedule)
+	__releases(q->queue_lock)
+{
+	trace_block_unplug(q, depth, !from_schedule);
+
+	/*
+	 * If we are punting this to kblockd, then we can safely drop
+	 * the queue_lock before waking kblockd (which needs to take
+	 * this lock).
+	 */
+	if (from_schedule) {
+		spin_unlock(q->queue_lock);
+		blk_run_queue_async(q);
+	} else {
+		__blk_run_queue(q);
+		spin_unlock(q->queue_lock);
+	}
+
+}
+
+static void flush_plug_callbacks(struct blk_plug *plug)
+{
+	LIST_HEAD(callbacks);
+
+	if (list_empty(&plug->cb_list))
+		return;
+
+	list_splice_init(&plug->cb_list, &callbacks);
+
+	while (!list_empty(&callbacks)) {
+		struct blk_plug_cb *cb = list_first_entry(&callbacks,
+							  struct blk_plug_cb,
+							  list);
+		list_del(&cb->list);
+		cb->callback(cb);
+	}
+}
+
+void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct request_queue *q;
 	unsigned long flags;
 	struct request *rq;
+	LIST_HEAD(list);
+	unsigned int depth;
 
 	BUG_ON(plug->magic != PLUG_MAGIC);
 
+	flush_plug_callbacks(plug);
 	if (list_empty(&plug->list))
 		return;
 
-	if (plug->should_sort)
-		list_sort(NULL, &plug->list, plug_rq_cmp);
+	list_splice_init(&plug->list, &list);
+
+	if (plug->should_sort) {
+		list_sort(NULL, &list, plug_rq_cmp);
+		plug->should_sort = 0;
+	}
 
 	q = NULL;
+	depth = 0;
+
+	/*
+	 * Save and disable interrupts here, to avoid doing it for every
+	 * queue lock we have to take.
+	 */
 	local_irq_save(flags);
-	while (!list_empty(&plug->list)) {
-		rq = list_entry_rq(plug->list.next);
+	while (!list_empty(&list)) {
+		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
 		BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
 		BUG_ON(!rq->q);
 		if (rq->q != q) {
-			if (q) {
-				__blk_run_queue(q, false);
-				spin_unlock(q->queue_lock);
-			}
+			/*
+			 * This drops the queue lock
+			 */
+			if (q)
+				queue_unplugged(q, depth, from_schedule);
 			q = rq->q;
+			depth = 0;
 			spin_lock(q->queue_lock);
 		}
 		rq->cmd_flags &= ~REQ_ON_PLUG;
@@ -2706,38 +2769,27 @@ static void flush_plug_list(struct blk_plug *plug)
 			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
 		else
 			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
-	}
 
-	if (q) {
-		__blk_run_queue(q, false);
-		spin_unlock(q->queue_lock);
+		depth++;
 	}
 
-	BUG_ON(!list_empty(&plug->list));
-	local_irq_restore(flags);
-}
-
-static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
-	flush_plug_list(plug);
+	/*
+	 * This drops the queue lock
+	 */
+	if (q)
+		queue_unplugged(q, depth, from_schedule);
 
-	if (plug == tsk->plug)
-		tsk->plug = NULL;
+	local_irq_restore(flags);
 }
 
 void blk_finish_plug(struct blk_plug *plug)
 {
-	if (plug)
-		__blk_finish_plug(current, plug);
-}
-EXPORT_SYMBOL(blk_finish_plug);
+	blk_flush_plug_list(plug, false);
 
-void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
-	__blk_finish_plug(tsk, plug);
-	tsk->plug = plug;
+	if (plug == current->plug)
+		current->plug = NULL;
 }
-EXPORT_SYMBOL(__blk_flush_plug);
+EXPORT_SYMBOL(blk_finish_plug);
 
 int __init blk_dev_init(void)
 {
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 7482b7fa863b..81e31819a597 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
 	__elv_add_request(q, rq, where);
-	__blk_run_queue(q, false);
+	__blk_run_queue(q);
 	/* the queue is stopped so it won't be plugged+unplugged */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
 		q->request_fn(q);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index eba4a2790c6c..6c9b5e189e62 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * request_fn may confuse the driver.  Always use kblockd.
 	 */
 	if (queued)
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 }
 
 /**
@@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error)
 	 * the comment in flush_end_io().
 	 */
 	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
-		__blk_run_queue(q, true);
+		blk_run_queue_async(q);
 }
 
 /**
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 261c75c665ae..bd236313f35d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -66,14 +66,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 
 	if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
 		blk_set_queue_full(q, BLK_RW_SYNC);
-	} else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
+	} else {
 		blk_clear_queue_full(q, BLK_RW_SYNC);
 		wake_up(&rl->wait[BLK_RW_SYNC]);
 	}
 
 	if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
 		blk_set_queue_full(q, BLK_RW_ASYNC);
-	} else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
+	} else {
 		blk_clear_queue_full(q, BLK_RW_ASYNC);
 		wake_up(&rl->wait[BLK_RW_ASYNC]);
 	}
@@ -498,7 +498,6 @@ int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
 	struct device *dev = disk_to_dev(disk);
-
 	struct request_queue *q = disk->queue;
 
 	if (WARN_ON(!q))
@@ -509,8 +508,10 @@ int blk_register_queue(struct gendisk *disk)
 		return ret;
 
 	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
-	if (ret < 0)
+	if (ret < 0) {
+		blk_trace_remove_sysfs(dev);
 		return ret;
+	}
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
@@ -521,7 +522,7 @@ int blk_register_queue(struct gendisk *disk)
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
-		blk_trace_remove_sysfs(disk_to_dev(disk));
+		blk_trace_remove_sysfs(dev);
 		kobject_put(&dev->kobj);
 		return ret;
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 0475a22a420d..252a81a306f7 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -160,9 +160,8 @@ static void throtl_put_tg(struct throtl_grp *tg)
 }
 
 static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
-			struct cgroup *cgroup)
+			struct blkio_cgroup *blkcg)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 	struct throtl_grp *tg = NULL;
 	void *key = td;
 	struct backing_dev_info *bdi = &td->queue->backing_dev_info;
@@ -229,12 +228,12 @@ done:
 
 static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
 {
-	struct cgroup *cgroup;
 	struct throtl_grp *tg = NULL;
+	struct blkio_cgroup *blkcg;
 
 	rcu_read_lock();
-	cgroup = task_cgroup(current, blkio_subsys_id);
-	tg = throtl_find_alloc_tg(td, cgroup);
+	blkcg = task_blkio_cgroup(current);
+	tg = throtl_find_alloc_tg(td, blkcg);
 	if (!tg)
 		tg = &td->root_tg;
 	rcu_read_unlock();
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3be881ec95ad..ab7a9e6a9b1c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1014,10 +1014,9 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
 	cfqg->needs_update = true;
 }
 
-static struct cfq_group *
-cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
+static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
+		struct blkio_cgroup *blkcg, int create)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 	struct cfq_group *cfqg = NULL;
 	void *key = cfqd;
 	int i, j;
@@ -1079,12 +1078,12 @@ done:
  */
 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
 {
-	struct cgroup *cgroup;
+	struct blkio_cgroup *blkcg;
 	struct cfq_group *cfqg = NULL;
 
 	rcu_read_lock();
-	cgroup = task_cgroup(current, blkio_subsys_id);
-	cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create);
+	blkcg = task_blkio_cgroup(current);
+	cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create);
 	if (!cfqg && create)
 		cfqg = &cfqd->root_group;
 	rcu_read_unlock();
@@ -2582,28 +2581,20 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 }
 
 /*
- * Must always be called with the rcu_read_lock() held
+ * Call func for each cic attached to this ioc.
  */
 static void
-__call_for_each_cic(struct io_context *ioc,
-		    void (*func)(struct io_context *, struct cfq_io_context *))
+call_for_each_cic(struct io_context *ioc,
+		  void (*func)(struct io_context *, struct cfq_io_context *))
 {
 	struct cfq_io_context *cic;
 	struct hlist_node *n;
 
+	rcu_read_lock();
+
 	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
 		func(ioc, cic);
-}
 
-/*
- * Call func for each cic attached to this ioc.
- */
-static void
-call_for_each_cic(struct io_context *ioc,
-		  void (*func)(struct io_context *, struct cfq_io_context *))
-{
-	rcu_read_lock();
-	__call_for_each_cic(ioc, func);
 	rcu_read_unlock();
 }
 
@@ -2664,7 +2655,7 @@ static void cfq_free_io_context(struct io_context *ioc)
 	 * should be ok to iterate over the known list, we will see all cic's
 	 * since no new ones are added.
 	 */
-	__call_for_each_cic(ioc, cic_free_func);
+	call_for_each_cic(ioc, cic_free_func);
 }
 
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -3368,7 +3359,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			    cfqd->busy_queues > 1) {
 				cfq_del_timer(cfqd, cfqq);
 				cfq_clear_cfqq_wait_request(cfqq);
-				__blk_run_queue(cfqd->queue, false);
+				__blk_run_queue(cfqd->queue);
 			} else {
 				cfq_blkiocg_update_idle_time_stats(
 						&cfqq->cfqg->blkg);
@@ -3383,7 +3374,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * this new queue is RT and the current one is BE
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		__blk_run_queue(cfqd->queue, false);
+		__blk_run_queue(cfqd->queue);
 	}
 }
 
@@ -3743,7 +3734,7 @@ static void cfq_kick_queue(struct work_struct *work)
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
-	__blk_run_queue(cfqd->queue, false);
+	__blk_run_queue(cfqd->queue);
 	spin_unlock_irq(q->queue_lock);
 }
 
diff --git a/block/elevator.c b/block/elevator.c
index 0cdb4e7ebab4..45ca1e34f582 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q)
 	 */
 	elv_drain_elevator(q);
 	while (q->rq.elvpriv) {
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		spin_unlock_irq(q->queue_lock);
 		msleep(10);
 		spin_lock_irq(q->queue_lock);
@@ -671,7 +671,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 			q->boundary_rq = rq;
 		}
 	} else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
-		    where == ELEVATOR_INSERT_SORT)
+		    (where == ELEVATOR_INSERT_SORT ||
+		     where == ELEVATOR_INSERT_SORT_MERGE))
 		where = ELEVATOR_INSERT_BACK;
 
 	switch (where) {
@@ -695,7 +696,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 *   with anything.  There's no point in delaying queue
 		 *   processing.
 		 */
-		__blk_run_queue(q, false);
+		__blk_run_queue(q);
 		break;
 
 	case ELEVATOR_INSERT_SORT_MERGE:
diff --git a/block/genhd.c b/block/genhd.c
index b364bd038a18..2dd988723d73 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1588,9 +1588,13 @@ static void disk_events_workfn(struct work_struct *work)
 
 	spin_unlock_irq(&ev->lock);
 
-	/* tell userland about new events */
+	/*
+	 * Tell userland about new events.  Only the events listed in
+	 * @disk->events are reported.  Unlisted events are processed the
+	 * same internally but never get reported to userland.
+	 */
 	for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
-		if (events & (1 << i))
+		if (events & disk->events & (1 << i))
 			envp[nr_events++] = disk_uevents[i];
 
 	if (nr_events)
diff --git a/drivers/Kconfig b/drivers/Kconfig
index aca706751469..61631edfecc2 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -121,4 +121,7 @@ source "drivers/platform/Kconfig"
 source "drivers/clk/Kconfig"
 
 source "drivers/hwspinlock/Kconfig"
+
+source "drivers/clocksource/Kconfig"
+
 endmenu
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 66a03caa2ad9..f739a70b1c70 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -1,5 +1,6 @@
 config ACPI_APEI
 	bool "ACPI Platform Error Interface (APEI)"
+	select MISC_FILESYSTEMS
 	select PSTORE
 	depends on X86
 	help
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index d6cb0ff6988e..e6cef8e1b534 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -929,13 +929,17 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 	return 0;
 }
 
-static size_t erst_reader(u64 *id, enum pstore_type_id *type,
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time);
 static u64 erst_writer(enum pstore_type_id type, size_t size);
 
 static struct pstore_info erst_info = {
 	.owner		= THIS_MODULE,
 	.name		= "erst",
+	.open		= erst_open_pstore,
+	.close		= erst_close_pstore,
 	.read		= erst_reader,
 	.write		= erst_writer,
 	.erase		= erst_clear
@@ -957,12 +961,32 @@ struct cper_pstore_record {
 	char data[];
 } __packed;
 
-static size_t erst_reader(u64 *id, enum pstore_type_id *type,
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+	int rc;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	rc = erst_get_record_id_begin(&reader_pos);
+
+	return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+	erst_get_record_id_end();
+
+	return 0;
+}
+
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 		       struct timespec *time)
 {
 	int rc;
-	ssize_t len;
-	unsigned long flags;
+	ssize_t len = 0;
 	u64 record_id;
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
 					(erst_info.buf - sizeof(*rcd));
@@ -970,24 +994,28 @@ static size_t erst_reader(u64 *id, enum pstore_type_id *type,
 	if (erst_disable)
 		return -ENODEV;
 
-	raw_spin_lock_irqsave(&erst_lock, flags);
 skip:
-	rc = __erst_get_next_record_id(&record_id);
-	if (rc) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return rc;
-	}
+	rc = erst_get_record_id_next(&reader_pos, &record_id);
+	if (rc)
+		goto out;
+
 	/* no more record */
 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
-		raw_spin_unlock_irqrestore(&erst_lock, flags);
-		return 0;
+		rc = -1;
+		goto out;
 	}
 
-	len = __erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
-			  erst_erange.size);
+	len = erst_read(record_id, &rcd->hdr, sizeof(*rcd) +
+			erst_info.bufsize);
+	/* The record may be cleared by others, try read next record */
+	if (len == -ENOENT)
+		goto skip;
+	else if (len < 0) {
+		rc = -1;
+		goto out;
+	}
 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
 		goto skip;
-	raw_spin_unlock_irqrestore(&erst_lock, flags);
 
 	*id = record_id;
 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
@@ -1005,7 +1033,8 @@ skip:
 		time->tv_sec = 0;
 	time->tv_nsec = 0;
 
-	return len - sizeof(*rcd);
+out:
+	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 
 static u64 erst_writer(enum pstore_type_id type, size_t size)
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 3a73a93596e8..85b32376dad7 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -49,10 +49,6 @@ ACPI_MODULE_NAME("processor_perflib");
 
 static DEFINE_MUTEX(performance_mutex);
 
-/* Use cpufreq debug layer for _PPC changes. */
-#define cpufreq_printk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /*
  * _PPC support is implemented as a CPUfreq policy notifier:
  * This means each time a CPUfreq driver registered also with
@@ -145,7 +141,7 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 		return -ENODEV;
 	}
 
-	cpufreq_printk("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
+	pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
 		       (int)ppc, ppc ? "" : "not");
 
 	pr->performance_platform_limit = (int)ppc;
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index ad3501739563..605a2954ef17 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -710,20 +710,14 @@ static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr)
 }
 
 #ifdef CONFIG_X86
-static int acpi_throttling_rdmsr(struct acpi_processor *pr,
-					u64 *value)
+static int acpi_throttling_rdmsr(u64 *value)
 {
-	struct cpuinfo_x86 *c;
 	u64 msr_high, msr_low;
-	unsigned int cpu;
 	u64 msr = 0;
 	int ret = -1;
 
-	cpu = pr->id;
-	c = &cpu_data(cpu);
-
-	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
-		!cpu_has(c, X86_FEATURE_ACPI)) {
+	if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
+		!this_cpu_has(X86_FEATURE_ACPI)) {
 		printk(KERN_ERR PREFIX
 			"HARDWARE addr space,NOT supported yet\n");
 	} else {
@@ -738,18 +732,13 @@ static int acpi_throttling_rdmsr(struct acpi_processor *pr,
 	return ret;
 }
 
-static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
+static int acpi_throttling_wrmsr(u64 value)
 {
-	struct cpuinfo_x86 *c;
-	unsigned int cpu;
 	int ret = -1;
 	u64 msr;
 
-	cpu = pr->id;
-	c = &cpu_data(cpu);
-
-	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
-		!cpu_has(c, X86_FEATURE_ACPI)) {
+	if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
+		!this_cpu_has(X86_FEATURE_ACPI)) {
 		printk(KERN_ERR PREFIX
 			"HARDWARE addr space,NOT supported yet\n");
 	} else {
@@ -761,15 +750,14 @@ static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
 	return ret;
 }
 #else
-static int acpi_throttling_rdmsr(struct acpi_processor *pr,
-				u64 *value)
+static int acpi_throttling_rdmsr(u64 *value)
 {
 	printk(KERN_ERR PREFIX
 		"HARDWARE addr space,NOT supported yet\n");
 	return -1;
 }
 
-static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
+static int acpi_throttling_wrmsr(u64 value)
 {
 	printk(KERN_ERR PREFIX
 		"HARDWARE addr space,NOT supported yet\n");
@@ -801,7 +789,7 @@ static int acpi_read_throttling_status(struct acpi_processor *pr,
 		ret = 0;
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		ret = acpi_throttling_rdmsr(pr, value);
+		ret = acpi_throttling_rdmsr(value);
 		break;
 	default:
 		printk(KERN_ERR PREFIX "Unknown addr space %d\n",
@@ -834,7 +822,7 @@ static int acpi_write_throttling_state(struct acpi_processor *pr,
 		ret = 0;
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		ret = acpi_throttling_wrmsr(pr, value);
+		ret = acpi_throttling_wrmsr(value);
 		break;
 	default:
 		printk(KERN_ERR PREFIX "Unknown addr space %d\n",
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index b136c9c1e531..449c556274c0 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -943,6 +943,10 @@ static int acpi_bus_get_flags(struct acpi_device *device)
 	if (ACPI_SUCCESS(status))
 		device->flags.lockable = 1;
 
+	/* Power resources cannot be power manageable. */
+	if (device->device_type == ACPI_BUS_TYPE_POWER)
+		return 0;
+
 	/* Presence of _PS0|_PR0 indicates 'power manageable' */
 	status = acpi_get_handle(device->handle, "_PS0", &temp);
 	if (ACPI_FAILURE(status))
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 821040503154..7025593a58c8 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -214,7 +214,7 @@ static int amba_pm_resume_noirq(struct device *dev)
 
 #endif /* !CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
 static int amba_pm_freeze(struct device *dev)
 {
@@ -352,7 +352,7 @@ static int amba_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATION */
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define amba_pm_freeze		NULL
 #define amba_pm_thaw		NULL
@@ -363,7 +363,7 @@ static int amba_pm_restore_noirq(struct device *dev)
 #define amba_pm_poweroff_noirq	NULL
 #define amba_pm_restore_noirq	NULL
 
-#endif /* !CONFIG_HIBERNATION */
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #ifdef CONFIG_PM
 
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 39d829cd82dd..71afe0371311 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -150,7 +150,7 @@ static const struct ata_port_info ahci_port_info[] = {
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_FPDMA_AA | AHCI_HFLAG_NO_PMP |
 				 AHCI_HFLAG_YES_NCQ),
-		.flags		= AHCI_FLAG_COMMON,
+		.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NO_DIPM,
 		.pio_mask	= ATA_PIO4,
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
@@ -261,6 +261,12 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */
 	{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */
 	{ PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
+	{ PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */
+	{ PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point AHCI */
+	{ PCI_VDEVICE(INTEL, 0x1e04), board_ahci }, /* Panther Point RAID */
+	{ PCI_VDEVICE(INTEL, 0x1e05), board_ahci }, /* Panther Point RAID */
+	{ PCI_VDEVICE(INTEL, 0x1e06), board_ahci }, /* Panther Point RAID */
+	{ PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point RAID */
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 39865009c251..12c5282e7fca 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -229,6 +229,10 @@ enum {
 	EM_CTL_ALHD		= (1 << 26), /* Activity LED */
 	EM_CTL_XMT		= (1 << 25), /* Transmit Only */
 	EM_CTL_SMB		= (1 << 24), /* Single Message Buffer */
+	EM_CTL_SGPIO		= (1 << 19), /* SGPIO messages supported */
+	EM_CTL_SES		= (1 << 18), /* SES-2 messages supported */
+	EM_CTL_SAFTE		= (1 << 17), /* SAF-TE messages supported */
+	EM_CTL_LED		= (1 << 16), /* LED messages supported */
 
 	/* em message type */
 	EM_MSG_TYPE_LED		= (1 << 0), /* LED */
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 0bc3fd6c3fdb..6f6e7718b05c 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -309,6 +309,14 @@ static const struct pci_device_id piix_pci_tbl[] = {
 	{ 0x8086, 0x1d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
 	/* SATA Controller IDE (PBG) */
 	{ 0x8086, 0x1d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+	/* SATA Controller IDE (Panther Point) */
+	{ 0x8086, 0x1e00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
+	/* SATA Controller IDE (Panther Point) */
+	{ 0x8086, 0x1e01, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
+	/* SATA Controller IDE (Panther Point) */
+	{ 0x8086, 0x1e08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+	/* SATA Controller IDE (Panther Point) */
+	{ 0x8086, 0x1e09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
 	{ }	/* terminate list */
 };
 
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 26d452339e98..d38c40fe4ddb 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -109,6 +109,8 @@ static ssize_t ahci_read_em_buffer(struct device *dev,
 static ssize_t ahci_store_em_buffer(struct device *dev,
 				    struct device_attribute *attr,
 				    const char *buf, size_t size);
+static ssize_t ahci_show_em_supported(struct device *dev,
+				      struct device_attribute *attr, char *buf);
 
 static DEVICE_ATTR(ahci_host_caps, S_IRUGO, ahci_show_host_caps, NULL);
 static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL);
@@ -116,6 +118,7 @@ static DEVICE_ATTR(ahci_host_version, S_IRUGO, ahci_show_host_version, NULL);
 static DEVICE_ATTR(ahci_port_cmd, S_IRUGO, ahci_show_port_cmd, NULL);
 static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO,
 		   ahci_read_em_buffer, ahci_store_em_buffer);
+static DEVICE_ATTR(em_message_supported, S_IRUGO, ahci_show_em_supported, NULL);
 
 struct device_attribute *ahci_shost_attrs[] = {
 	&dev_attr_link_power_management_policy,
@@ -126,6 +129,7 @@ struct device_attribute *ahci_shost_attrs[] = {
 	&dev_attr_ahci_host_version,
 	&dev_attr_ahci_port_cmd,
 	&dev_attr_em_buffer,
+	&dev_attr_em_message_supported,
 	NULL
 };
 EXPORT_SYMBOL_GPL(ahci_shost_attrs);
@@ -343,6 +347,24 @@ static ssize_t ahci_store_em_buffer(struct device *dev,
 	return size;
 }
 
+static ssize_t ahci_show_em_supported(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ata_port *ap = ata_shost_to_port(shost);
+	struct ahci_host_priv *hpriv = ap->host->private_data;
+	void __iomem *mmio = hpriv->mmio;
+	u32 em_ctl;
+
+	em_ctl = readl(mmio + HOST_EM_CTL);
+
+	return sprintf(buf, "%s%s%s%s\n",
+		       em_ctl & EM_CTL_LED ? "led " : "",
+		       em_ctl & EM_CTL_SAFTE ? "saf-te " : "",
+		       em_ctl & EM_CTL_SES ? "ses-2 " : "",
+		       em_ctl & EM_CTL_SGPIO ? "sgpio " : "");
+}
+
 /**
  *	ahci_save_initial_config - Save and fixup initial config values
  *	@dev: target AHCI device
@@ -1897,7 +1919,17 @@ static void ahci_pmp_attach(struct ata_port *ap)
 	ahci_enable_fbs(ap);
 
 	pp->intr_mask |= PORT_IRQ_BAD_PMP;
-	writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
+
+	/*
+	 * We must not change the port interrupt mask register if the
+	 * port is marked frozen, the value in pp->intr_mask will be
+	 * restored later when the port is thawed.
+	 *
+	 * Note that during initialization, the port is marked as
+	 * frozen since the irq handler is not yet registered.
+	 */
+	if (!(ap->pflags & ATA_PFLAG_FROZEN))
+		writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
 }
 
 static void ahci_pmp_detach(struct ata_port *ap)
@@ -1913,7 +1945,10 @@ static void ahci_pmp_detach(struct ata_port *ap)
 	writel(cmd, port_mmio + PORT_CMD);
 
 	pp->intr_mask &= ~PORT_IRQ_BAD_PMP;
-	writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
+
+	/* see comment above in ahci_pmp_attach() */
+	if (!(ap->pflags & ATA_PFLAG_FROZEN))
+		writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
 }
 
 int ahci_port_resume(struct ata_port *ap)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 423c0a6952b2..76c3c15cb1e6 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4139,6 +4139,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	 */
 	{ "PIONEER DVD-RW  DVRTD08",	"1.00",	ATA_HORKAGE_NOSETXFER },
 	{ "PIONEER DVD-RW  DVR-212D",	"1.28", ATA_HORKAGE_NOSETXFER },
+	{ "PIONEER DVD-RW  DVR-216D",	"1.08", ATA_HORKAGE_NOSETXFER },
 
 	/* End Marker */
 	{ }
@@ -5480,7 +5481,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 	if (!ap)
 		return NULL;
 
-	ap->pflags |= ATA_PFLAG_INITIALIZING;
+	ap->pflags |= ATA_PFLAG_INITIALIZING | ATA_PFLAG_FROZEN;
 	ap->lock = &host->lock;
 	ap->print_id = -1;
 	ap->host = host;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 88cd22fa65cd..dad9fd660f37 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3316,6 +3316,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	struct ata_eh_context *ehc = &link->eh_context;
 	struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
 	enum ata_lpm_policy old_policy = link->lpm_policy;
+	bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM;
 	unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
 	unsigned int err_mask;
 	int rc;
@@ -3332,7 +3333,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	 */
 	ata_for_each_dev(dev, link, ENABLED) {
 		bool hipm = ata_id_has_hipm(dev->id);
-		bool dipm = ata_id_has_dipm(dev->id);
+		bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
 
 		/* find the first enabled and LPM enabled devices */
 		if (!link_dev)
@@ -3389,7 +3390,8 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 
 	/* host config updated, enable DIPM if transitioning to MIN_POWER */
 	ata_for_each_dev(dev, link, ENABLED) {
-		if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) {
+		if (policy == ATA_LPM_MIN_POWER && !no_dipm &&
+		    ata_id_has_dipm(dev->id)) {
 			err_mask = ata_dev_set_feature(dev,
 					SETFEATURES_SATA_ENABLE, SATA_DIPM);
 			if (err_mask && err_mask != AC_ERR_DEV) {
diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c
index 0da0dcc7dd08..a5fdbdcb0faf 100644
--- a/drivers/ata/pata_at91.c
+++ b/drivers/ata/pata_at91.c
@@ -33,11 +33,12 @@
 
 
 #define DRV_NAME "pata_at91"
-#define DRV_VERSION "0.1"
+#define DRV_VERSION "0.2"
 
 #define CF_IDE_OFFSET	    0x00c00000
 #define CF_ALT_IDE_OFFSET   0x00e00000
 #define CF_IDE_RES_SIZE     0x08
+#define NCS_RD_PULSE_LIMIT  0x3f /* maximal value for pulse bitfields */
 
 struct at91_ide_info {
 	unsigned long mode;
@@ -49,8 +50,18 @@ struct at91_ide_info {
 	void __iomem *alt_addr;
 };
 
-static const struct ata_timing initial_timing =
-	{XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0};
+static const struct ata_timing initial_timing = {
+	.mode		= XFER_PIO_0,
+	.setup		= 70,
+	.act8b		= 290,
+	.rec8b		= 240,
+	.cyc8b		= 600,
+	.active		= 165,
+	.recover	= 150,
+	.dmack_hold	= 0,
+	.cycle		= 600,
+	.udma		= 0
+};
 
 static unsigned long calc_mck_cycles(unsigned long ns, unsigned long mck_hz)
 {
@@ -109,6 +120,11 @@ static void set_smc_timing(struct device *dev,
 	/* (CS0, CS1, DIR, OE) <= (CFCE1, CFCE2, CFRNW, NCSX) timings */
 	ncs_read_setup = 1;
 	ncs_read_pulse = read_cycle - 2;
+	if (ncs_read_pulse > NCS_RD_PULSE_LIMIT) {
+		ncs_read_pulse = NCS_RD_PULSE_LIMIT;
+		dev_warn(dev, "ncs_read_pulse limited to maximal value %lu\n",
+			ncs_read_pulse);
+	}
 
 	/* Write timings same as read timings */
 	write_cycle = read_cycle;
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index bdd2719f3f68..bc9e702186dd 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2643,16 +2643,19 @@ fore200e_init(struct fore200e* fore200e, struct device *parent)
 }
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id fore200e_sba_match[];
 static int __devinit fore200e_sba_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct fore200e_bus *bus;
 	struct fore200e *fore200e;
 	static int index = 0;
 	int err;
 
-	if (!op->dev.of_match)
+	match = of_match_device(fore200e_sba_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	bus = op->dev.of_match->data;
+	bus = match->data;
 
 	fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
 	if (!fore200e)
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index e9e5238f3106..d57e8d0fb823 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -168,11 +168,4 @@ config SYS_HYPERVISOR
 	bool
 	default n
 
-config ARCH_NO_SYSDEV_OPS
-	bool
-	---help---
-	  To be selected by architectures that don't use sysdev class or
-	  sysdev driver power management (suspend/resume) and shutdown
-	  operations.
-
 endmenu
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 19f49e41ce5d..a34dca0ad041 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -111,8 +111,6 @@ static inline int driver_match_device(struct device_driver *drv,
 	return drv->bus->match ? drv->bus->match(dev, drv) : 1;
 }
 
-extern void sysdev_shutdown(void);
-
 extern char *make_class_name(const char *name, struct kobject *kobj);
 
 extern int devres_release_all(struct device *dev);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 81b78ede37c4..bc8729d603a7 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -400,7 +400,7 @@ static void device_remove_groups(struct device *dev,
 static int device_add_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 	int error;
 
 	if (class) {
@@ -440,7 +440,7 @@ static int device_add_attrs(struct device *dev)
 static void device_remove_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
-	struct device_type *type = dev->type;
+	const struct device_type *type = dev->type;
 
 	device_remove_groups(dev, dev->groups);
 
@@ -1314,8 +1314,7 @@ EXPORT_SYMBOL_GPL(put_device);
 EXPORT_SYMBOL_GPL(device_create_file);
 EXPORT_SYMBOL_GPL(device_remove_file);
 
-struct root_device
-{
+struct root_device {
 	struct device dev;
 	struct module *owner;
 };
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index da57ee9d63fe..6658da743c3a 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -245,6 +245,10 @@ int device_attach(struct device *dev)
 
 	device_lock(dev);
 	if (dev->driver) {
+		if (klist_node_attached(&dev->p->knode_driver)) {
+			ret = 1;
+			goto out_unlock;
+		}
 		ret = device_bind_driver(dev);
 		if (ret == 0)
 			ret = 1;
@@ -257,6 +261,7 @@ int device_attach(struct device *dev)
 		ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
 		pm_runtime_put_sync(dev);
 	}
+out_unlock:
 	device_unlock(dev);
 	return ret;
 }
@@ -316,8 +321,7 @@ static void __device_release_driver(struct device *dev)
 
 	drv = dev->driver;
 	if (drv) {
-		pm_runtime_get_noresume(dev);
-		pm_runtime_barrier(dev);
+		pm_runtime_get_sync(dev);
 
 		driver_sysfs_remove(dev);
 
@@ -326,6 +330,8 @@ static void __device_release_driver(struct device *dev)
 						     BUS_NOTIFY_UNBIND_DRIVER,
 						     dev);
 
+		pm_runtime_put_sync(dev);
+
 		if (dev->bus && dev->bus->remove)
 			dev->bus->remove(dev);
 		else if (drv->remove)
@@ -338,7 +344,6 @@ static void __device_release_driver(struct device *dev)
 						     BUS_NOTIFY_UNBOUND_DRIVER,
 						     dev);
 
-		pm_runtime_put_sync(dev);
 	}
 }
 
@@ -408,17 +413,16 @@ void *dev_get_drvdata(const struct device *dev)
 }
 EXPORT_SYMBOL(dev_get_drvdata);
 
-void dev_set_drvdata(struct device *dev, void *data)
+int dev_set_drvdata(struct device *dev, void *data)
 {
 	int error;
 
-	if (!dev)
-		return;
 	if (!dev->p) {
 		error = device_private_init(dev);
 		if (error)
-			return;
+			return error;
 	}
 	dev->p->driver_data = data;
+	return 0;
 }
 EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8c798ef7f13f..bbb03e6f7255 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -521,6 +521,11 @@ static int _request_firmware(const struct firmware **firmware_p,
 	if (!firmware_p)
 		return -EINVAL;
 
+	if (WARN_ON(usermodehelper_is_disabled())) {
+		dev_err(device, "firmware: %s will not be loaded\n", name);
+		return -EBUSY;
+	}
+
 	*firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
 	if (!firmware) {
 		dev_err(device, "%s: kmalloc(struct firmware) failed\n",
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 3da6a43b7756..0a134a424a37 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -48,7 +48,8 @@ static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
 	return MEMORY_CLASS_NAME;
 }
 
-static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env)
+static int memory_uevent(struct kset *kset, struct kobject *obj,
+			struct kobj_uevent_env *env)
 {
 	int retval = 0;
 
@@ -228,10 +229,11 @@ int memory_isolate_notify(unsigned long val, void *v)
  * OK to have direct references to sparsemem variables in here.
  */
 static int
-memory_section_action(unsigned long phys_index, unsigned long action)
+memory_block_action(unsigned long phys_index, unsigned long action)
 {
 	int i;
 	unsigned long start_pfn, start_paddr;
+	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 	struct page *first_page;
 	int ret;
 
@@ -243,7 +245,7 @@ memory_section_action(unsigned long phys_index, unsigned long action)
 	 * that way.
 	 */
 	if (action == MEM_ONLINE) {
-		for (i = 0; i < PAGES_PER_SECTION; i++) {
+		for (i = 0; i < nr_pages; i++) {
 			if (PageReserved(first_page+i))
 				continue;
 
@@ -257,12 +259,12 @@ memory_section_action(unsigned long phys_index, unsigned long action)
 	switch (action) {
 		case MEM_ONLINE:
 			start_pfn = page_to_pfn(first_page);
-			ret = online_pages(start_pfn, PAGES_PER_SECTION);
+			ret = online_pages(start_pfn, nr_pages);
 			break;
 		case MEM_OFFLINE:
 			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
 			ret = remove_memory(start_paddr,
-					    PAGES_PER_SECTION << PAGE_SHIFT);
+					    nr_pages << PAGE_SHIFT);
 			break;
 		default:
 			WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
@@ -276,7 +278,7 @@ memory_section_action(unsigned long phys_index, unsigned long action)
 static int memory_block_change_state(struct memory_block *mem,
 		unsigned long to_state, unsigned long from_state_req)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	mutex_lock(&mem->state_mutex);
 
@@ -288,20 +290,11 @@ static int memory_block_change_state(struct memory_block *mem,
 	if (to_state == MEM_OFFLINE)
 		mem->state = MEM_GOING_OFFLINE;
 
-	for (i = 0; i < sections_per_block; i++) {
-		ret = memory_section_action(mem->start_section_nr + i,
-					    to_state);
-		if (ret)
-			break;
-	}
-
-	if (ret) {
-		for (i = 0; i < sections_per_block; i++)
-			memory_section_action(mem->start_section_nr + i,
-					      from_state_req);
+	ret = memory_block_action(mem->start_section_nr, to_state);
 
+	if (ret)
 		mem->state = from_state_req;
-	} else
+	else
 		mem->state = to_state;
 
 out:
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index f051cfff18af..1c291af637b3 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -149,6 +149,7 @@ static void platform_device_release(struct device *dev)
 
 	of_device_node_put(&pa->pdev.dev);
 	kfree(pa->pdev.dev.platform_data);
+	kfree(pa->pdev.mfd_cell);
 	kfree(pa->pdev.resource);
 	kfree(pa);
 }
@@ -191,18 +192,18 @@ EXPORT_SYMBOL_GPL(platform_device_alloc);
 int platform_device_add_resources(struct platform_device *pdev,
 				  const struct resource *res, unsigned int num)
 {
-	struct resource *r;
+	struct resource *r = NULL;
 
-	if (!res)
-		return 0;
-
-	r = kmemdup(res, sizeof(struct resource) * num, GFP_KERNEL);
-	if (r) {
-		pdev->resource = r;
-		pdev->num_resources = num;
-		return 0;
+	if (res) {
+		r = kmemdup(res, sizeof(struct resource) * num, GFP_KERNEL);
+		if (!r)
+			return -ENOMEM;
 	}
-	return -ENOMEM;
+
+	kfree(pdev->resource);
+	pdev->resource = r;
+	pdev->num_resources = num;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(platform_device_add_resources);
 
@@ -219,17 +220,17 @@ EXPORT_SYMBOL_GPL(platform_device_add_resources);
 int platform_device_add_data(struct platform_device *pdev, const void *data,
 			     size_t size)
 {
-	void *d;
+	void *d = NULL;
 
-	if (!data)
-		return 0;
-
-	d = kmemdup(data, size, GFP_KERNEL);
-	if (d) {
-		pdev->dev.platform_data = d;
-		return 0;
+	if (data) {
+		d = kmemdup(data, size, GFP_KERNEL);
+		if (!d)
+			return -ENOMEM;
 	}
-	return -ENOMEM;
+
+	kfree(pdev->dev.platform_data);
+	pdev->dev.platform_data = d;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(platform_device_add_data);
 
@@ -666,7 +667,7 @@ static int platform_legacy_resume(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_prepare(struct device *dev)
+int platform_pm_prepare(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -677,7 +678,7 @@ static int platform_pm_prepare(struct device *dev)
 	return ret;
 }
 
-static void platform_pm_complete(struct device *dev)
+void platform_pm_complete(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 
@@ -685,16 +686,11 @@ static void platform_pm_complete(struct device *dev)
 		drv->pm->complete(dev);
 }
 
-#else /* !CONFIG_PM_SLEEP */
-
-#define platform_pm_prepare		NULL
-#define platform_pm_complete		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_SUSPEND
 
-int __weak platform_pm_suspend(struct device *dev)
+int platform_pm_suspend(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -712,7 +708,7 @@ int __weak platform_pm_suspend(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_suspend_noirq(struct device *dev)
+int platform_pm_suspend_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -728,7 +724,7 @@ int __weak platform_pm_suspend_noirq(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_resume(struct device *dev)
+int platform_pm_resume(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -746,7 +742,7 @@ int __weak platform_pm_resume(struct device *dev)
 	return ret;
 }
 
-int __weak platform_pm_resume_noirq(struct device *dev)
+int platform_pm_resume_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -762,18 +758,11 @@ int __weak platform_pm_resume_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_SUSPEND */
-
-#define platform_pm_suspend		NULL
-#define platform_pm_resume		NULL
-#define platform_pm_suspend_noirq	NULL
-#define platform_pm_resume_noirq	NULL
-
-#endif /* !CONFIG_SUSPEND */
+#endif /* CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
-static int platform_pm_freeze(struct device *dev)
+int platform_pm_freeze(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -791,7 +780,7 @@ static int platform_pm_freeze(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_freeze_noirq(struct device *dev)
+int platform_pm_freeze_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -807,7 +796,7 @@ static int platform_pm_freeze_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_thaw(struct device *dev)
+int platform_pm_thaw(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -825,7 +814,7 @@ static int platform_pm_thaw(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_thaw_noirq(struct device *dev)
+int platform_pm_thaw_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -841,7 +830,7 @@ static int platform_pm_thaw_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_poweroff(struct device *dev)
+int platform_pm_poweroff(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -859,7 +848,7 @@ static int platform_pm_poweroff(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_poweroff_noirq(struct device *dev)
+int platform_pm_poweroff_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -875,7 +864,7 @@ static int platform_pm_poweroff_noirq(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_restore(struct device *dev)
+int platform_pm_restore(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -893,7 +882,7 @@ static int platform_pm_restore(struct device *dev)
 	return ret;
 }
 
-static int platform_pm_restore_noirq(struct device *dev)
+int platform_pm_restore_noirq(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
@@ -909,62 +898,13 @@ static int platform_pm_restore_noirq(struct device *dev)
 	return ret;
 }
 
-#else /* !CONFIG_HIBERNATION */
-
-#define platform_pm_freeze		NULL
-#define platform_pm_thaw		NULL
-#define platform_pm_poweroff		NULL
-#define platform_pm_restore		NULL
-#define platform_pm_freeze_noirq	NULL
-#define platform_pm_thaw_noirq		NULL
-#define platform_pm_poweroff_noirq	NULL
-#define platform_pm_restore_noirq	NULL
-
-#endif /* !CONFIG_HIBERNATION */
-
-#ifdef CONFIG_PM_RUNTIME
-
-int __weak platform_pm_runtime_suspend(struct device *dev)
-{
-	return pm_generic_runtime_suspend(dev);
-};
-
-int __weak platform_pm_runtime_resume(struct device *dev)
-{
-	return pm_generic_runtime_resume(dev);
-};
-
-int __weak platform_pm_runtime_idle(struct device *dev)
-{
-	return pm_generic_runtime_idle(dev);
-};
-
-#else /* !CONFIG_PM_RUNTIME */
-
-#define platform_pm_runtime_suspend NULL
-#define platform_pm_runtime_resume NULL
-#define platform_pm_runtime_idle NULL
-
-#endif /* !CONFIG_PM_RUNTIME */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 
 static const struct dev_pm_ops platform_dev_pm_ops = {
-	.prepare = platform_pm_prepare,
-	.complete = platform_pm_complete,
-	.suspend = platform_pm_suspend,
-	.resume = platform_pm_resume,
-	.freeze = platform_pm_freeze,
-	.thaw = platform_pm_thaw,
-	.poweroff = platform_pm_poweroff,
-	.restore = platform_pm_restore,
-	.suspend_noirq = platform_pm_suspend_noirq,
-	.resume_noirq = platform_pm_resume_noirq,
-	.freeze_noirq = platform_pm_freeze_noirq,
-	.thaw_noirq = platform_pm_thaw_noirq,
-	.poweroff_noirq = platform_pm_poweroff_noirq,
-	.restore_noirq = platform_pm_restore_noirq,
-	.runtime_suspend = platform_pm_runtime_suspend,
-	.runtime_resume = platform_pm_runtime_resume,
-	.runtime_idle = platform_pm_runtime_idle,
+	.runtime_suspend = pm_generic_runtime_suspend,
+	.runtime_resume = pm_generic_runtime_resume,
+	.runtime_idle = pm_generic_runtime_idle,
+	USE_PLATFORM_PM_SLEEP_OPS
 };
 
 struct bus_type platform_bus_type = {
@@ -976,41 +916,6 @@ struct bus_type platform_bus_type = {
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
-/**
- * platform_bus_get_pm_ops() - return pointer to busses dev_pm_ops
- *
- * This function can be used by platform code to get the current
- * set of dev_pm_ops functions used by the platform_bus_type.
- */
-const struct dev_pm_ops * __init platform_bus_get_pm_ops(void)
-{
-	return platform_bus_type.pm;
-}
-
-/**
- * platform_bus_set_pm_ops() - update dev_pm_ops for the platform_bus_type
- *
- * @pm: pointer to new dev_pm_ops struct to be used for platform_bus_type
- *
- * Platform code can override the dev_pm_ops methods of
- * platform_bus_type by using this function.  It is expected that
- * platform code will first do a platform_bus_get_pm_ops(), then
- * kmemdup it, then customize selected methods and pass a pointer to
- * the new struct dev_pm_ops to this function.
- *
- * Since platform-specific code is customizing methods for *all*
- * devices (not just platform-specific devices) it is expected that
- * any custom overrides of these functions will keep existing behavior
- * and simply extend it.  For example, any customization of the
- * runtime PM methods should continue to call the pm_generic_*
- * functions as the default ones do in addition to the
- * platform-specific behavior.
- */
-void __init platform_bus_set_pm_ops(const struct dev_pm_ops *pm)
-{
-	platform_bus_type.pm = pm;
-}
-
 int __init platform_bus_init(void)
 {
 	int error;
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 118c1b92a511..3647e114d0e7 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
+obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
-ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
-ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
+ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
+\ No newline at end of file
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
new file mode 100644
index 000000000000..c0dd09df7be8
--- /dev/null
+++ b/drivers/base/power/clock_ops.c
@@ -0,0 +1,431 @@
+/*
+ * drivers/base/power/clock_ops.c - Generic clock manipulation PM callbacks
+ *
+ * Copyright (c) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+
+#ifdef CONFIG_PM_RUNTIME
+
+struct pm_runtime_clk_data {
+	struct list_head clock_list;
+	struct mutex lock;
+};
+
+enum pce_status {
+	PCE_STATUS_NONE = 0,
+	PCE_STATUS_ACQUIRED,
+	PCE_STATUS_ENABLED,
+	PCE_STATUS_ERROR,
+};
+
+struct pm_clock_entry {
+	struct list_head node;
+	char *con_id;
+	struct clk *clk;
+	enum pce_status status;
+};
+
+static struct pm_runtime_clk_data *__to_prd(struct device *dev)
+{
+	return dev ? dev->power.subsys_data : NULL;
+}
+
+/**
+ * pm_runtime_clk_add - Start using a device clock for runtime PM.
+ * @dev: Device whose clock is going to be used for runtime PM.
+ * @con_id: Connection ID of the clock.
+ *
+ * Add the clock represented by @con_id to the list of clocks used for
+ * the runtime PM of @dev.
+ */
+int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return -EINVAL;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		dev_err(dev, "Not enough memory for clock entry.\n");
+		return -ENOMEM;
+	}
+
+	if (con_id) {
+		ce->con_id = kstrdup(con_id, GFP_KERNEL);
+		if (!ce->con_id) {
+			dev_err(dev,
+				"Not enough memory for clock connection ID.\n");
+			kfree(ce);
+			return -ENOMEM;
+		}
+	}
+
+	mutex_lock(&prd->lock);
+	list_add_tail(&ce->node, &prd->clock_list);
+	mutex_unlock(&prd->lock);
+	return 0;
+}
+
+/**
+ * __pm_runtime_clk_remove - Destroy runtime PM clock entry.
+ * @ce: Runtime PM clock entry to destroy.
+ *
+ * This routine must be called under the mutex protecting the runtime PM list
+ * of clocks corresponding the the @ce's device.
+ */
+static void __pm_runtime_clk_remove(struct pm_clock_entry *ce)
+{
+	if (!ce)
+		return;
+
+	list_del(&ce->node);
+
+	if (ce->status < PCE_STATUS_ERROR) {
+		if (ce->status == PCE_STATUS_ENABLED)
+			clk_disable(ce->clk);
+
+		if (ce->status >= PCE_STATUS_ACQUIRED)
+			clk_put(ce->clk);
+	}
+
+	if (ce->con_id)
+		kfree(ce->con_id);
+
+	kfree(ce);
+}
+
+/**
+ * pm_runtime_clk_remove - Stop using a device clock for runtime PM.
+ * @dev: Device whose clock should not be used for runtime PM any more.
+ * @con_id: Connection ID of the clock.
+ *
+ * Remove the clock represented by @con_id from the list of clocks used for
+ * the runtime PM of @dev.
+ */
+void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (!con_id && !ce->con_id) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		} else if (!con_id || !ce->con_id) {
+			continue;
+		} else if (!strcmp(con_id, ce->con_id)) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+}
+
+/**
+ * pm_runtime_clk_init - Initialize a device's list of runtime PM clocks.
+ * @dev: Device to initialize the list of runtime PM clocks for.
+ *
+ * Allocate a struct pm_runtime_clk_data object, initialize its lock member and
+ * make the @dev's power.subsys_data field point to it.
+ */
+int pm_runtime_clk_init(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd;
+
+	prd = kzalloc(sizeof(*prd), GFP_KERNEL);
+	if (!prd) {
+		dev_err(dev, "Not enough memory fo runtime PM data.\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&prd->clock_list);
+	mutex_init(&prd->lock);
+	dev->power.subsys_data = prd;
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_destroy - Destroy a device's list of runtime PM clocks.
+ * @dev: Device to destroy the list of runtime PM clocks for.
+ *
+ * Clear the @dev's power.subsys_data field, remove the list of clock entries
+ * from the struct pm_runtime_clk_data object pointed to by it before and free
+ * that object.
+ */
+void pm_runtime_clk_destroy(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce, *c;
+
+	if (!prd)
+		return;
+
+	dev->power.subsys_data = NULL;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_safe_reverse(ce, c, &prd->clock_list, node)
+		__pm_runtime_clk_remove(ce);
+
+	mutex_unlock(&prd->lock);
+
+	kfree(prd);
+}
+
+/**
+ * pm_runtime_clk_acquire - Acquire a device clock.
+ * @dev: Device whose clock is to be acquired.
+ * @con_id: Connection ID of the clock.
+ */
+static void pm_runtime_clk_acquire(struct device *dev,
+				    struct pm_clock_entry *ce)
+{
+	ce->clk = clk_get(dev, ce->con_id);
+	if (IS_ERR(ce->clk)) {
+		ce->status = PCE_STATUS_ERROR;
+	} else {
+		ce->status = PCE_STATUS_ACQUIRED;
+		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
+	}
+}
+
+/**
+ * pm_runtime_clk_suspend - Disable clocks in a device's runtime PM clock list.
+ * @dev: Device to disable the clocks for.
+ */
+int pm_runtime_clk_suspend(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_reverse(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_disable(ce->clk);
+			ce->status = PCE_STATUS_ACQUIRED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_resume - Enable clocks in a device's runtime PM clock list.
+ * @dev: Device to enable the clocks for.
+ */
+int pm_runtime_clk_resume(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_enable(ce->clk);
+			ce->status = PCE_STATUS_ENABLED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the pwr_domain member of that object is copied to the device's
+ * pwr_domain field and its con_ids member is used to populate the device's list
+ * of runtime PM clocks, depending on @action.
+ *
+ * If the device's pwr_domain field is already populated with a value different
+ * from the one stored in the struct pm_clk_notifier_block object, the function
+ * does nothing.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+	char *con_id;
+	int error;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->pwr_domain)
+			break;
+
+		error = pm_runtime_clk_init(dev);
+		if (error)
+			break;
+
+		dev->pwr_domain = clknb->pwr_domain;
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				pm_runtime_clk_add(dev, con_id);
+		} else {
+			pm_runtime_clk_add(dev, NULL);
+		}
+
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->pwr_domain != clknb->pwr_domain)
+			break;
+
+		dev->pwr_domain = NULL;
+		pm_runtime_clk_destroy(dev);
+		break;
+	}
+
+	return 0;
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+/**
+ * enable_clock - Enable a device clock.
+ * @dev: Device whose clock is to be enabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void enable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_enable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced on.\n");
+	}
+}
+
+/**
+ * disable_clock - Disable a device clock.
+ * @dev: Device whose clock is to be disabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void disable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_disable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced off.\n");
+	}
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the con_ids member of that object is used to enable or disable
+ * the device's clocks, depending on @action.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+	char *con_id;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				enable_clock(dev, con_id);
+		} else {
+			enable_clock(dev, NULL);
+		}
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				disable_clock(dev, con_id);
+		} else {
+			disable_clock(dev, NULL);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+/**
+ * pm_runtime_clk_add_notifier - Add bus type notifier for runtime PM clocks.
+ * @bus: Bus type to add the notifier to.
+ * @clknb: Notifier to be added to the given bus type.
+ *
+ * The nb member of @clknb is not expected to be initialized and its
+ * notifier_call member will be replaced with pm_runtime_clk_notify().  However,
+ * the remaining members of @clknb should be populated prior to calling this
+ * routine.
+ */
+void pm_runtime_clk_add_notifier(struct bus_type *bus,
+				 struct pm_clk_notifier_block *clknb)
+{
+	if (!bus || !clknb)
+		return;
+
+	clknb->nb.notifier_call = pm_runtime_clk_notify;
+	bus_register_notifier(bus, &clknb->nb);
+}
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 42f97f925629..cb3bb368681c 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -74,6 +74,23 @@ EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
 
 #ifdef CONFIG_PM_SLEEP
 /**
+ * pm_generic_prepare - Generic routine preparing a device for power transition.
+ * @dev: Device to prepare.
+ *
+ * Prepare a device for a system-wide power transition.
+ */
+int pm_generic_prepare(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (drv && drv->pm && drv->pm->prepare)
+		ret = drv->pm->prepare(dev);
+
+	return ret;
+}
+
+/**
  * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
  * @dev: Device to handle.
  * @event: PM transition of the system under way.
@@ -213,16 +230,38 @@ int pm_generic_restore(struct device *dev)
 	return __pm_generic_resume(dev, PM_EVENT_RESTORE);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
+
+/**
+ * pm_generic_complete - Generic routine competing a device power transition.
+ * @dev: Device to handle.
+ *
+ * Complete a device power transition during a system-wide power transition.
+ */
+void pm_generic_complete(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+
+	if (drv && drv->pm && drv->pm->complete)
+		drv->pm->complete(dev);
+
+	/*
+	 * Let runtime PM try to suspend devices that haven't been in use before
+	 * going into the system-wide sleep state we're resuming from.
+	 */
+	pm_runtime_idle(dev);
+}
 #endif /* CONFIG_PM_SLEEP */
 
 struct dev_pm_ops generic_subsys_pm_ops = {
 #ifdef CONFIG_PM_SLEEP
+	.prepare = pm_generic_prepare,
 	.suspend = pm_generic_suspend,
 	.resume = pm_generic_resume,
 	.freeze = pm_generic_freeze,
 	.thaw = pm_generic_thaw,
 	.poweroff = pm_generic_poweroff,
 	.restore = pm_generic_restore,
+	.complete = pm_generic_complete,
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	.runtime_suspend = pm_generic_runtime_suspend,
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 052dc53eef38..aa6320207745 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -63,6 +63,7 @@ void device_pm_init(struct device *dev)
 	dev->power.wakeup = NULL;
 	spin_lock_init(&dev->power.lock);
 	pm_runtime_init(dev);
+	INIT_LIST_HEAD(&dev->power.entry);
 }
 
 /**
@@ -233,7 +234,7 @@ static int pm_op(struct device *dev,
 		}
 		break;
 #endif /* CONFIG_SUSPEND */
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
 		if (ops->freeze) {
@@ -260,7 +261,7 @@ static int pm_op(struct device *dev,
 			suspend_report_result(ops->restore, error);
 		}
 		break;
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	default:
 		error = -EINVAL;
 	}
@@ -308,7 +309,7 @@ static int pm_noirq_op(struct device *dev,
 		}
 		break;
 #endif /* CONFIG_SUSPEND */
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
 		if (ops->freeze_noirq) {
@@ -335,7 +336,7 @@ static int pm_noirq_op(struct device *dev,
 			suspend_report_result(ops->restore_noirq, error);
 		}
 		break;
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
 	default:
 		error = -EINVAL;
 	}
@@ -425,10 +426,8 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 
 	if (dev->pwr_domain) {
 		pm_dev_dbg(dev, state, "EARLY power domain ");
-		pm_noirq_op(dev, &dev->pwr_domain->ops, state);
-	}
-
-	if (dev->type && dev->type->pm) {
+		error = pm_noirq_op(dev, &dev->pwr_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
 		pm_dev_dbg(dev, state, "EARLY type ");
 		error = pm_noirq_op(dev, dev->type->pm, state);
 	} else if (dev->class && dev->class->pm) {
@@ -516,7 +515,8 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 
 	if (dev->pwr_domain) {
 		pm_dev_dbg(dev, state, "power domain ");
-		pm_op(dev, &dev->pwr_domain->ops, state);
+		error = pm_op(dev, &dev->pwr_domain->ops, state);
+		goto End;
 	}
 
 	if (dev->type && dev->type->pm) {
@@ -579,11 +579,13 @@ static bool is_async(struct device *dev)
  * Execute the appropriate "resume" callback for all devices whose status
  * indicates that they are suspended.
  */
-static void dpm_resume(pm_message_t state)
+void dpm_resume(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -628,12 +630,11 @@ static void device_complete(struct device *dev, pm_message_t state)
 {
 	device_lock(dev);
 
-	if (dev->pwr_domain && dev->pwr_domain->ops.complete) {
+	if (dev->pwr_domain) {
 		pm_dev_dbg(dev, state, "completing power domain ");
-		dev->pwr_domain->ops.complete(dev);
-	}
-
-	if (dev->type && dev->type->pm) {
+		if (dev->pwr_domain->ops.complete)
+			dev->pwr_domain->ops.complete(dev);
+	} else if (dev->type && dev->type->pm) {
 		pm_dev_dbg(dev, state, "completing type ");
 		if (dev->type->pm->complete)
 			dev->type->pm->complete(dev);
@@ -657,10 +658,12 @@ static void device_complete(struct device *dev, pm_message_t state)
  * Execute the ->complete() callbacks for all devices whose PM status is not
  * DPM_ON (this allows new devices to be registered).
  */
-static void dpm_complete(pm_message_t state)
+void dpm_complete(pm_message_t state)
 {
 	struct list_head list;
 
+	might_sleep();
+
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_prepared_list)) {
@@ -689,7 +692,6 @@ static void dpm_complete(pm_message_t state)
  */
 void dpm_resume_end(pm_message_t state)
 {
-	might_sleep();
 	dpm_resume(state);
 	dpm_complete(state);
 }
@@ -731,7 +733,12 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 {
 	int error;
 
-	if (dev->type && dev->type->pm) {
+	if (dev->pwr_domain) {
+		pm_dev_dbg(dev, state, "LATE power domain ");
+		error = pm_noirq_op(dev, &dev->pwr_domain->ops, state);
+		if (error)
+			return error;
+	} else if (dev->type && dev->type->pm) {
 		pm_dev_dbg(dev, state, "LATE type ");
 		error = pm_noirq_op(dev, dev->type->pm, state);
 		if (error)
@@ -748,11 +755,6 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 			return error;
 	}
 
-	if (dev->pwr_domain) {
-		pm_dev_dbg(dev, state, "LATE power domain ");
-		pm_noirq_op(dev, &dev->pwr_domain->ops, state);
-	}
-
 	return 0;
 }
 
@@ -840,21 +842,27 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		goto End;
 	}
 
+	if (dev->pwr_domain) {
+		pm_dev_dbg(dev, state, "power domain ");
+		error = pm_op(dev, &dev->pwr_domain->ops, state);
+		goto End;
+	}
+
 	if (dev->type && dev->type->pm) {
 		pm_dev_dbg(dev, state, "type ");
 		error = pm_op(dev, dev->type->pm, state);
-		goto Domain;
+		goto End;
 	}
 
 	if (dev->class) {
 		if (dev->class->pm) {
 			pm_dev_dbg(dev, state, "class ");
 			error = pm_op(dev, dev->class->pm, state);
-			goto Domain;
+			goto End;
 		} else if (dev->class->suspend) {
 			pm_dev_dbg(dev, state, "legacy class ");
 			error = legacy_suspend(dev, state, dev->class->suspend);
-			goto Domain;
+			goto End;
 		}
 	}
 
@@ -868,12 +876,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		}
 	}
 
- Domain:
-	if (!error && dev->pwr_domain) {
-		pm_dev_dbg(dev, state, "power domain ");
-		pm_op(dev, &dev->pwr_domain->ops, state);
-	}
-
  End:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
@@ -913,11 +915,13 @@ static int device_suspend(struct device *dev)
  * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
  * @state: PM transition of the system being carried out.
  */
-static int dpm_suspend(pm_message_t state)
+int dpm_suspend(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -964,7 +968,14 @@ static int device_prepare(struct device *dev, pm_message_t state)
 
 	device_lock(dev);
 
-	if (dev->type && dev->type->pm) {
+	if (dev->pwr_domain) {
+		pm_dev_dbg(dev, state, "preparing power domain ");
+		if (dev->pwr_domain->ops.prepare)
+			error = dev->pwr_domain->ops.prepare(dev);
+		suspend_report_result(dev->pwr_domain->ops.prepare, error);
+		if (error)
+			goto End;
+	} else if (dev->type && dev->type->pm) {
 		pm_dev_dbg(dev, state, "preparing type ");
 		if (dev->type->pm->prepare)
 			error = dev->type->pm->prepare(dev);
@@ -983,13 +994,6 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		if (dev->bus->pm->prepare)
 			error = dev->bus->pm->prepare(dev);
 		suspend_report_result(dev->bus->pm->prepare, error);
-		if (error)
-			goto End;
-	}
-
-	if (dev->pwr_domain && dev->pwr_domain->ops.prepare) {
-		pm_dev_dbg(dev, state, "preparing power domain ");
-		dev->pwr_domain->ops.prepare(dev);
 	}
 
  End:
@@ -1004,10 +1008,12 @@ static int device_prepare(struct device *dev, pm_message_t state)
  *
  * Execute the ->prepare() callback(s) for all devices.
  */
-static int dpm_prepare(pm_message_t state)
+int dpm_prepare(pm_message_t state)
 {
 	int error = 0;
 
+	might_sleep();
+
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.next);
@@ -1056,7 +1062,6 @@ int dpm_suspend_start(pm_message_t state)
 {
 	int error;
 
-	might_sleep();
 	error = dpm_prepare(state);
 	if (!error)
 		error = dpm_suspend(state);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 3172c60d23a9..0d4587b15c55 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -168,7 +168,6 @@ static int rpm_check_suspend_allowed(struct device *dev)
 static int rpm_idle(struct device *dev, int rpmflags)
 {
 	int (*callback)(struct device *);
-	int (*domain_callback)(struct device *);
 	int retval;
 
 	retval = rpm_check_suspend_allowed(dev);
@@ -214,7 +213,9 @@ static int rpm_idle(struct device *dev, int rpmflags)
 
 	dev->power.idle_notification = true;
 
-	if (dev->type && dev->type->pm)
+	if (dev->pwr_domain)
+		callback = dev->pwr_domain->ops.runtime_idle;
+	else if (dev->type && dev->type->pm)
 		callback = dev->type->pm->runtime_idle;
 	else if (dev->class && dev->class->pm)
 		callback = dev->class->pm->runtime_idle;
@@ -223,19 +224,10 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	else
 		callback = NULL;
 
-	if (dev->pwr_domain)
-		domain_callback = dev->pwr_domain->ops.runtime_idle;
-	else
-		domain_callback = NULL;
-
-	if (callback || domain_callback) {
+	if (callback) {
 		spin_unlock_irq(&dev->power.lock);
 
-		if (domain_callback)
-			retval = domain_callback(dev);
-
-		if (!retval && callback)
-			callback(dev);
+		callback(dev);
 
 		spin_lock_irq(&dev->power.lock);
 	}
@@ -382,7 +374,9 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 
 	__update_runtime_status(dev, RPM_SUSPENDING);
 
-	if (dev->type && dev->type->pm)
+	if (dev->pwr_domain)
+		callback = dev->pwr_domain->ops.runtime_suspend;
+	else if (dev->type && dev->type->pm)
 		callback = dev->type->pm->runtime_suspend;
 	else if (dev->class && dev->class->pm)
 		callback = dev->class->pm->runtime_suspend;
@@ -400,8 +394,6 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		else
 			pm_runtime_cancel_pending(dev);
 	} else {
-		if (dev->pwr_domain)
-			rpm_callback(dev->pwr_domain->ops.runtime_suspend, dev);
  no_callback:
 		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_deactivate_timer(dev);
@@ -582,9 +574,8 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	__update_runtime_status(dev, RPM_RESUMING);
 
 	if (dev->pwr_domain)
-		rpm_callback(dev->pwr_domain->ops.runtime_resume, dev);
-
-	if (dev->type && dev->type->pm)
+		callback = dev->pwr_domain->ops.runtime_resume;
+	else if (dev->type && dev->type->pm)
 		callback = dev->type->pm->runtime_resume;
 	else if (dev->class && dev->class->pm)
 		callback = dev->class->pm->runtime_resume;
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index fff49bee781d..a9f5b8979611 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -212,8 +212,9 @@ static ssize_t autosuspend_delay_ms_store(struct device *dev,
 static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
 		autosuspend_delay_ms_store);
 
-#endif
+#endif /* CONFIG_PM_RUNTIME */
 
+#ifdef CONFIG_PM_SLEEP
 static ssize_t
 wake_show(struct device * dev, struct device_attribute *attr, char * buf)
 {
@@ -248,7 +249,6 @@ wake_store(struct device * dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 
-#ifdef CONFIG_PM_SLEEP
 static ssize_t wakeup_count_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 4573c83df6dd..84f7c7d5a098 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -110,7 +110,6 @@ void wakeup_source_add(struct wakeup_source *ws)
 	spin_lock_irq(&events_lock);
 	list_add_rcu(&ws->entry, &wakeup_sources);
 	spin_unlock_irq(&events_lock);
-	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(wakeup_source_add);
 
@@ -258,7 +257,7 @@ void device_set_wakeup_capable(struct device *dev, bool capable)
 	if (!!dev->power.can_wakeup == !!capable)
 		return;
 
-	if (device_is_registered(dev)) {
+	if (device_is_registered(dev) && !list_empty(&dev->power.entry)) {
 		if (capable) {
 			if (wakeup_sysfs_add(dev))
 				return;
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index acde9b5ee131..9dff77bfe1e3 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -328,203 +328,8 @@ void sysdev_unregister(struct sys_device *sysdev)
 	kobject_put(&sysdev->kobj);
 }
 
-
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/**
- *	sysdev_shutdown - Shut down all system devices.
- *
- *	Loop over each class of system devices, and the devices in each
- *	of those classes. For each device, we call the shutdown method for
- *	each driver registered for the device - the auxiliaries,
- *	and the class driver.
- *
- *	Note: The list is iterated in reverse order, so that we shut down
- *	child devices before we shut down their parents. The list ordering
- *	is guaranteed by virtue of the fact that child devices are registered
- *	after their parents.
- */
-void sysdev_shutdown(void)
-{
-	struct sysdev_class *cls;
-
-	pr_debug("Shutting Down System Devices\n");
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Shutting down type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			struct sysdev_driver *drv;
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->shutdown)
-					drv->shutdown(sysdev);
-			}
-
-			/* Now call the generic one */
-			if (cls->shutdown)
-				cls->shutdown(sysdev);
-		}
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-}
-
-static void __sysdev_resume(struct sys_device *dev)
-{
-	struct sysdev_class *cls = dev->cls;
-	struct sysdev_driver *drv;
-
-	/* First, call the class-specific one */
-	if (cls->resume)
-		cls->resume(dev);
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled after %pF\n", cls->resume);
-
-	/* Call auxiliary drivers next. */
-	list_for_each_entry(drv, &cls->drivers, entry) {
-		if (drv->resume)
-			drv->resume(dev);
-		WARN_ONCE(!irqs_disabled(),
-			"Interrupts enabled after %pF\n", drv->resume);
-	}
-}
-
-/**
- *	sysdev_suspend - Suspend all system devices.
- *	@state:		Power state to enter.
- *
- *	We perform an almost identical operation as sysdev_shutdown()
- *	above, though calling ->suspend() instead. Interrupts are disabled
- *	when this called. Devices are responsible for both saving state and
- *	quiescing or powering down the device.
- *
- *	This is only called by the device PM core, so we let them handle
- *	all synchronization.
- */
-int sysdev_suspend(pm_message_t state)
-{
-	struct sysdev_class *cls;
-	struct sys_device *sysdev, *err_dev;
-	struct sysdev_driver *drv, *err_drv;
-	int ret;
-
-	pr_debug("Checking wake-up interrupts\n");
-
-	/* Return error code if there are any wake-up interrupts pending */
-	ret = check_wakeup_irqs();
-	if (ret)
-		return ret;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while suspending system devices\n");
-
-	pr_debug("Suspending System Devices\n");
-
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		pr_debug("Suspending type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->suspend) {
-					ret = drv->suspend(sysdev, state);
-					if (ret)
-						goto aux_driver;
-				}
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					drv->suspend);
-			}
-
-			/* Now call the generic one */
-			if (cls->suspend) {
-				ret = cls->suspend(sysdev, state);
-				if (ret)
-					goto cls_driver;
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					cls->suspend);
-			}
-		}
-	}
-	return 0;
-	/* resume current sysdev */
-cls_driver:
-	drv = NULL;
-	printk(KERN_ERR "Class suspend failed for %s: %d\n",
-		kobject_name(&sysdev->kobj), ret);
-
-aux_driver:
-	if (drv)
-		printk(KERN_ERR "Class driver suspend failed for %s: %d\n",
-				kobject_name(&sysdev->kobj), ret);
-	list_for_each_entry(err_drv, &cls->drivers, entry) {
-		if (err_drv == drv)
-			break;
-		if (err_drv->resume)
-			err_drv->resume(sysdev);
-	}
-
-	/* resume other sysdevs in current class */
-	list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-		if (err_dev == sysdev)
-			break;
-		pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-		__sysdev_resume(err_dev);
-	}
-
-	/* resume other classes */
-	list_for_each_entry_continue(cls, &system_kset->list, kset.kobj.entry) {
-		list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-			__sysdev_resume(err_dev);
-		}
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sysdev_suspend);
-
-/**
- *	sysdev_resume - Bring system devices back to life.
- *
- *	Similar to sysdev_suspend(), but we iterate the list forwards
- *	to guarantee that parent devices are resumed before their children.
- *
- *	Note: Interrupts are disabled when called.
- */
-int sysdev_resume(void)
-{
-	struct sysdev_class *cls;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while resuming system devices\n");
-
-	pr_debug("Resuming System Devices\n");
-
-	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Resuming type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			__sysdev_resume(sysdev);
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(sysdev_resume);
-#endif /* CONFIG_ARCH_NO_SYSDEV_OPS */
+EXPORT_SYMBOL_GPL(sysdev_register);
+EXPORT_SYMBOL_GPL(sysdev_unregister);
 
 int __init system_bus_init(void)
 {
@@ -534,9 +339,6 @@ int __init system_bus_init(void)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
 #define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
 
 ssize_t sysdev_store_ulong(struct sys_device *sysdev,
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index 90af2943f9e4..c126db3cb7d1 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -73,6 +73,7 @@ int syscore_suspend(void)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(syscore_suspend);
 
 /**
  * syscore_resume - Execute all the registered system core resume callbacks.
@@ -95,6 +96,7 @@ void syscore_resume(void)
 				"Interrupts enabled after %pF\n", ops->resume);
 		}
 }
+EXPORT_SYMBOL_GPL(syscore_resume);
 #endif /* CONFIG_PM_SLEEP */
 
 /**
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 8066d086578a..e086fbbbe853 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2547,7 +2547,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
 	disk->major = MajorNumber;
 	disk->first_minor = n << DAC960_MaxPartitionsBits;
 	disk->fops = &DAC960_BlockDeviceOperations;
-	disk->events = DISK_EVENT_MEDIA_CHANGE;
    }
   /*
     Indicate the Block Device Registration completed successfully,
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 456c0cc90dcf..8eba86bba599 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1736,7 +1736,6 @@ static int __init fd_probe_drives(void)
 		disk->major = FLOPPY_MAJOR;
 		disk->first_minor = drive;
 		disk->fops = &floppy_fops;
-		disk->events = DISK_EVENT_MEDIA_CHANGE;
 		sprintf(disk->disk_name, "fd%d", drive);
 		disk->private_data = &unit[drive];
 		set_capacity(disk, 880*2);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index c871eae14120..ede16c64ff07 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1964,7 +1964,6 @@ static int __init atari_floppy_init (void)
 		unit[i].disk->first_minor = i;
 		sprintf(unit[i].disk->disk_name, "fd%d", i);
 		unit[i].disk->fops = &floppy_fops;
-		unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE;
 		unit[i].disk->private_data = &unit[i];
 		unit[i].disk->queue = blk_init_queue(do_fd_request,
 					&ataflop_lock);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 301d7a9a41a6..db8f88586c8d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4205,7 +4205,6 @@ static int __init floppy_init(void)
 		disks[dr]->major = FLOPPY_MAJOR;
 		disks[dr]->first_minor = TOMINOR(dr);
 		disks[dr]->fops = &floppy_fops;
-		disks[dr]->events = DISK_EVENT_MEDIA_CHANGE;
 		sprintf(disks[dr]->disk_name, "fd%d", dr);
 
 		init_timer(&motor_off_timer[dr]);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 2f2ccf686251..8690e31d9932 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -320,7 +320,6 @@ static void pcd_init_units(void)
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
-		disk->events = DISK_EVENT_MEDIA_CHANGE;
 	}
 }
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 21dfdb776869..869e7676d46f 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -837,7 +837,6 @@ static void pd_probe_drive(struct pd_unit *disk)
 	p->fops = &pd_fops;
 	p->major = major;
 	p->first_minor = (disk - pd) << PD_BITS;
-	p->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->gd = p;
 	p->private_data = disk;
 	p->queue = pd_queue;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 7adeb1edbf43..f21b520ef419 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -294,7 +294,6 @@ static void __init pf_init_units(void)
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, pf->name);
 		disk->fops = &pf_fops;
-		disk->events = DISK_EVENT_MEDIA_CHANGE;
 		if (!(*drives[unit])[D_PRT])
 			pf_drive_count++;
 	}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 16dc3645291c..9712fad82bc6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -92,6 +92,8 @@ struct rbd_client {
 	struct list_head	node;
 };
 
+struct rbd_req_coll;
+
 /*
  * a single io request
  */
@@ -100,6 +102,24 @@ struct rbd_request {
 	struct bio		*bio;		/* cloned bio */
 	struct page		**pages;	/* list of used pages */
 	u64			len;
+	int			coll_index;
+	struct rbd_req_coll	*coll;
+};
+
+struct rbd_req_status {
+	int done;
+	int rc;
+	u64 bytes;
+};
+
+/*
+ * a collection of requests
+ */
+struct rbd_req_coll {
+	int			total;
+	int			num_done;
+	struct kref		kref;
+	struct rbd_req_status	status[0];
 };
 
 struct rbd_snap {
@@ -416,6 +436,17 @@ static void rbd_put_client(struct rbd_device *rbd_dev)
 	rbd_dev->client = NULL;
 }
 
+/*
+ * Destroy requests collection
+ */
+static void rbd_coll_release(struct kref *kref)
+{
+	struct rbd_req_coll *coll =
+		container_of(kref, struct rbd_req_coll, kref);
+
+	dout("rbd_coll_release %p\n", coll);
+	kfree(coll);
+}
 
 /*
  * Create a new header structure, translate header format from the on-disk
@@ -590,6 +621,14 @@ static u64 rbd_get_segment(struct rbd_image_header *header,
 	return len;
 }
 
+static int rbd_get_num_segments(struct rbd_image_header *header,
+				u64 ofs, u64 len)
+{
+	u64 start_seg = ofs >> header->obj_order;
+	u64 end_seg = (ofs + len - 1) >> header->obj_order;
+	return end_seg - start_seg + 1;
+}
+
 /*
  * bio helpers
  */
@@ -735,6 +774,50 @@ static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
 	kfree(ops);
 }
 
+static void rbd_coll_end_req_index(struct request *rq,
+				   struct rbd_req_coll *coll,
+				   int index,
+				   int ret, u64 len)
+{
+	struct request_queue *q;
+	int min, max, i;
+
+	dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n",
+	     coll, index, ret, len);
+
+	if (!rq)
+		return;
+
+	if (!coll) {
+		blk_end_request(rq, ret, len);
+		return;
+	}
+
+	q = rq->q;
+
+	spin_lock_irq(q->queue_lock);
+	coll->status[index].done = 1;
+	coll->status[index].rc = ret;
+	coll->status[index].bytes = len;
+	max = min = coll->num_done;
+	while (max < coll->total && coll->status[max].done)
+		max++;
+
+	for (i = min; i<max; i++) {
+		__blk_end_request(rq, coll->status[i].rc,
+				  coll->status[i].bytes);
+		coll->num_done++;
+		kref_put(&coll->kref, rbd_coll_release);
+	}
+	spin_unlock_irq(q->queue_lock);
+}
+
+static void rbd_coll_end_req(struct rbd_request *req,
+			     int ret, u64 len)
+{
+	rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
+}
+
 /*
  * Send ceph osd request
  */
@@ -749,6 +832,8 @@ static int rbd_do_request(struct request *rq,
 			  int flags,
 			  struct ceph_osd_req_op *ops,
 			  int num_reply,
+			  struct rbd_req_coll *coll,
+			  int coll_index,
 			  void (*rbd_cb)(struct ceph_osd_request *req,
 					 struct ceph_msg *msg),
 			  struct ceph_osd_request **linger_req,
@@ -763,12 +848,20 @@ static int rbd_do_request(struct request *rq,
 	struct ceph_osd_request_head *reqhead;
 	struct rbd_image_header *header = &dev->header;
 
-	ret = -ENOMEM;
 	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
-	if (!req_data)
-		goto done;
+	if (!req_data) {
+		if (coll)
+			rbd_coll_end_req_index(rq, coll, coll_index,
+					       -ENOMEM, len);
+		return -ENOMEM;
+	}
 
-	dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs);
+	if (coll) {
+		req_data->coll = coll;
+		req_data->coll_index = coll_index;
+	}
+
+	dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
 
 	down_read(&header->snap_rwsem);
 
@@ -777,9 +870,9 @@ static int rbd_do_request(struct request *rq,
 				      ops,
 				      false,
 				      GFP_NOIO, pages, bio);
-	if (IS_ERR(req)) {
+	if (!req) {
 		up_read(&header->snap_rwsem);
-		ret = PTR_ERR(req);
+		ret = -ENOMEM;
 		goto done_pages;
 	}
 
@@ -828,7 +921,8 @@ static int rbd_do_request(struct request *rq,
 		ret = ceph_osdc_wait_request(&dev->client->osdc, req);
 		if (ver)
 			*ver = le64_to_cpu(req->r_reassert_version.version);
-		dout("reassert_ver=%lld\n", le64_to_cpu(req->r_reassert_version.version));
+		dout("reassert_ver=%lld\n",
+		     le64_to_cpu(req->r_reassert_version.version));
 		ceph_osdc_put_request(req);
 	}
 	return ret;
@@ -837,10 +931,8 @@ done_err:
 	bio_chain_put(req_data->bio);
 	ceph_osdc_put_request(req);
 done_pages:
+	rbd_coll_end_req(req_data, ret, len);
 	kfree(req_data);
-done:
-	if (rq)
-		blk_end_request(rq, ret, len);
 	return ret;
 }
 
@@ -874,7 +966,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
 		bytes = req_data->len;
 	}
 
-	blk_end_request(req_data->rq, rc, bytes);
+	rbd_coll_end_req(req_data, rc, bytes);
 
 	if (req_data->bio)
 		bio_chain_put(req_data->bio);
@@ -934,6 +1026,7 @@ static int rbd_req_sync_op(struct rbd_device *dev,
 			  flags,
 			  ops,
 			  2,
+			  NULL, 0,
 			  NULL,
 			  linger_req, ver);
 	if (ret < 0)
@@ -959,7 +1052,9 @@ static int rbd_do_op(struct request *rq,
 		     u64 snapid,
 		     int opcode, int flags, int num_reply,
 		     u64 ofs, u64 len,
-		     struct bio *bio)
+		     struct bio *bio,
+		     struct rbd_req_coll *coll,
+		     int coll_index)
 {
 	char *seg_name;
 	u64 seg_ofs;
@@ -995,7 +1090,10 @@ static int rbd_do_op(struct request *rq,
 			     flags,
 			     ops,
 			     num_reply,
+			     coll, coll_index,
 			     rbd_req_cb, 0, NULL);
+
+	rbd_destroy_ops(ops);
 done:
 	kfree(seg_name);
 	return ret;
@@ -1008,13 +1106,15 @@ static int rbd_req_write(struct request *rq,
 			 struct rbd_device *rbd_dev,
 			 struct ceph_snap_context *snapc,
 			 u64 ofs, u64 len,
-			 struct bio *bio)
+			 struct bio *bio,
+			 struct rbd_req_coll *coll,
+			 int coll_index)
 {
 	return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
 			 CEPH_OSD_OP_WRITE,
 			 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
 			 2,
-			 ofs, len, bio);
+			 ofs, len, bio, coll, coll_index);
 }
 
 /*
@@ -1024,14 +1124,16 @@ static int rbd_req_read(struct request *rq,
 			 struct rbd_device *rbd_dev,
 			 u64 snapid,
 			 u64 ofs, u64 len,
-			 struct bio *bio)
+			 struct bio *bio,
+			 struct rbd_req_coll *coll,
+			 int coll_index)
 {
 	return rbd_do_op(rq, rbd_dev, NULL,
 			 (snapid ? snapid : CEPH_NOSNAP),
 			 CEPH_OSD_OP_READ,
 			 CEPH_OSD_FLAG_READ,
 			 2,
-			 ofs, len, bio);
+			 ofs, len, bio, coll, coll_index);
 }
 
 /*
@@ -1063,7 +1165,9 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev,
 {
 	struct ceph_osd_req_op *ops;
 	struct page **pages = NULL;
-	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
+	int ret;
+
+	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
 	if (ret < 0)
 		return ret;
 
@@ -1077,6 +1181,7 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev,
 			  CEPH_OSD_FLAG_READ,
 			  ops,
 			  1,
+			  NULL, 0,
 			  rbd_simple_req_cb, 0, NULL);
 
 	rbd_destroy_ops(ops);
@@ -1274,6 +1379,20 @@ static int rbd_req_sync_exec(struct rbd_device *dev,
 	return ret;
 }
 
+static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
+{
+	struct rbd_req_coll *coll =
+			kzalloc(sizeof(struct rbd_req_coll) +
+			        sizeof(struct rbd_req_status) * num_reqs,
+				GFP_ATOMIC);
+
+	if (!coll)
+		return NULL;
+	coll->total = num_reqs;
+	kref_init(&coll->kref);
+	return coll;
+}
+
 /*
  * block device queue callback
  */
@@ -1291,6 +1410,8 @@ static void rbd_rq_fn(struct request_queue *q)
 		bool do_write;
 		int size, op_size = 0;
 		u64 ofs;
+		int num_segs, cur_seg = 0;
+		struct rbd_req_coll *coll;
 
 		/* peek at request from block layer */
 		if (!rq)
@@ -1321,6 +1442,14 @@ static void rbd_rq_fn(struct request_queue *q)
 		     do_write ? "write" : "read",
 		     size, blk_rq_pos(rq) * 512ULL);
 
+		num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
+		coll = rbd_alloc_coll(num_segs);
+		if (!coll) {
+			spin_lock_irq(q->queue_lock);
+			__blk_end_request_all(rq, -ENOMEM);
+			goto next;
+		}
+
 		do {
 			/* a bio clone to be passed down to OSD req */
 			dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
@@ -1328,35 +1457,41 @@ static void rbd_rq_fn(struct request_queue *q)
 						  rbd_dev->header.block_name,
 						  ofs, size,
 						  NULL, NULL);
+			kref_get(&coll->kref);
 			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
 					      op_size, GFP_ATOMIC);
 			if (!bio) {
-				spin_lock_irq(q->queue_lock);
-				__blk_end_request_all(rq, -ENOMEM);
-				goto next;
+				rbd_coll_end_req_index(rq, coll, cur_seg,
+						       -ENOMEM, op_size);
+				goto next_seg;
 			}
 
+
 			/* init OSD command: write or read */
 			if (do_write)
 				rbd_req_write(rq, rbd_dev,
 					      rbd_dev->header.snapc,
 					      ofs,
-					      op_size, bio);
+					      op_size, bio,
+					      coll, cur_seg);
 			else
 				rbd_req_read(rq, rbd_dev,
 					     cur_snap_id(rbd_dev),
 					     ofs,
-					     op_size, bio);
+					     op_size, bio,
+					     coll, cur_seg);
 
+next_seg:
 			size -= op_size;
 			ofs += op_size;
 
+			cur_seg++;
 			rq_bio = next_bio;
 		} while (size > 0);
+		kref_put(&coll->kref, rbd_coll_release);
 
 		if (bp)
 			bio_pair_release(bp);
-
 		spin_lock_irq(q->queue_lock);
 next:
 		rq = blk_fetch_request(q);
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 24a482f2fbd6..fd5adcd55944 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -858,7 +858,6 @@ static int __devinit swim_floppy_init(struct swim_priv *swd)
 		swd->unit[drive].disk->first_minor = drive;
 		sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
 		swd->unit[drive].disk->fops = &floppy_fops;
-		swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE;
 		swd->unit[drive].disk->private_data = &swd->unit[drive];
 		swd->unit[drive].disk->queue = swd->queue;
 		set_capacity(swd->unit[drive].disk, 2880);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 4c10f56facbf..773bfa792777 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1163,7 +1163,6 @@ static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device
 	disk->major = FLOPPY_MAJOR;
 	disk->first_minor = i;
 	disk->fops = &floppy_fops;
-	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->private_data = &floppy_states[i];
 	disk->queue = swim3_queue;
 	disk->flags |= GENHD_FL_REMOVABLE;
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 68b9430c7cfe..0e376d46bdd1 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -2334,7 +2334,6 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	disk->major = UB_MAJOR;
 	disk->first_minor = lun->id * UB_PARTS_PER_LUN;
 	disk->fops = &ub_bd_fops;
-	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->private_data = lun;
 	disk->driverfs_dev = &sc->intf->dev;
 
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 645ff765cd12..6c7fd7db6dff 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -1005,7 +1005,6 @@ static int __devinit ace_setup(struct ace_device *ace)
 	ace->gd->major = ace_major;
 	ace->gd->first_minor = ace->id * ACE_NUM_MINORS;
 	ace->gd->fops = &ace_fops;
-	ace->gd->events = DISK_EVENT_MEDIA_CHANGE;
 	ace->gd->queue = ace->queue;
 	ace->gd->private_data = ace;
 	snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a');
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 514dd8efaf73..75fb965b8f72 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -986,6 +986,9 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t
 
 	cdinfo(CD_OPEN, "entering cdrom_open\n"); 
 
+	/* open is event synchronization point, check events first */
+	check_disk_change(bdev);
+
 	/* if this was a O_NONBLOCK open and we should honor the flags,
 	 * do a quick open without drive/disc integrity checks. */
 	cdi->use_count++;
@@ -1012,9 +1015,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t
 
 	cdinfo(CD_OPEN, "Use count for \"/dev/%s\" now %d\n",
 			cdi->name, cdi->use_count);
-	/* Do this on open.  Don't wait for mount, because they might
-	    not be mounting, but opening with O_NONBLOCK */
-	check_disk_change(bdev);
 	return 0;
 err_release:
 	if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index b2b034fea34e..3ceaf006e7f0 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -803,7 +803,6 @@ static int __devinit probe_gdrom(struct platform_device *devptr)
 		goto probe_fail_cdrom_register;
 	}
 	gd.disk->fops = &gdrom_bdops;
-	gd.disk->events = DISK_EVENT_MEDIA_CHANGE;
 	/* latch on to the interrupt */
 	err = gdrom_set_interrupt_handlers();
 	if (err)
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 4e874c5fa605..e427fbe45999 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -626,7 +626,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	gendisk->queue = q;
 	gendisk->fops = &viocd_fops;
 	gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
-	gendisk->events = DISK_EVENT_MEDIA_CHANGE;
 	set_capacity(gendisk, 0);
 	gendisk->private_data = d;
 	d->viocd_disk = gendisk;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index ad59b4e0a9b5..49502bc5360a 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -523,7 +523,7 @@ config RAW_DRIVER
           with the O_DIRECT flag.
 
 config MAX_RAW_DEVS
-	int "Maximum number of RAW devices to support (1-8192)"
+	int "Maximum number of RAW devices to support (1-65536)"
 	depends on RAW_DRIVER
 	default "256"
 	help
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 012cba0d6d96..b072648dc3f6 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -115,6 +115,9 @@ static struct agp_memory *agp_create_user_memory(unsigned long num_agp_pages)
 	struct agp_memory *new;
 	unsigned long alloc_size = num_agp_pages*sizeof(struct page *);
 
+	if (INT_MAX/sizeof(struct page *) < num_agp_pages)
+		return NULL;
+
 	new = kzalloc(sizeof(struct agp_memory), GFP_KERNEL);
 	if (new == NULL)
 		return NULL;
@@ -234,11 +237,14 @@ struct agp_memory *agp_allocate_memory(struct agp_bridge_data *bridge,
 	int scratch_pages;
 	struct agp_memory *new;
 	size_t i;
+	int cur_memory;
 
 	if (!bridge)
 		return NULL;
 
-	if ((atomic_read(&bridge->current_memory_agp) + page_count) > bridge->max_memory_agp)
+	cur_memory = atomic_read(&bridge->current_memory_agp);
+	if ((cur_memory + page_count > bridge->max_memory_agp) ||
+	    (cur_memory + page_count < page_count))
 		return NULL;
 
 	if (type >= AGP_USER_TYPES) {
@@ -1089,8 +1095,8 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type)
 		return -EINVAL;
 	}
 
-	/* AK: could wrap */
-	if ((pg_start + mem->page_count) > num_entries)
+	if (((pg_start + mem->page_count) > num_entries) ||
+	    ((pg_start + mem->page_count) < pg_start))
 		return -EINVAL;
 
 	j = pg_start;
@@ -1124,7 +1130,7 @@ int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
 {
 	size_t i;
 	struct agp_bridge_data *bridge;
-	int mask_type;
+	int mask_type, num_entries;
 
 	bridge = mem->bridge;
 	if (!bridge)
@@ -1136,6 +1142,11 @@ int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
 	if (type != mem->type)
 		return -EINVAL;
 
+	num_entries = agp_num_entries();
+	if (((pg_start + mem->page_count) > num_entries) ||
+	    ((pg_start + mem->page_count) < pg_start))
+		return -EINVAL;
+
 	mask_type = bridge->driver->agp_type_to_mask_type(bridge, type);
 	if (mask_type != 0) {
 		/* The generic routines know nothing of memory types */
diff --git a/drivers/char/bsr.c b/drivers/char/bsr.c
index a4a6c2f044b5..cf39bc08ce08 100644
--- a/drivers/char/bsr.c
+++ b/drivers/char/bsr.c
@@ -295,7 +295,7 @@ static int bsr_create_devs(struct device_node *bn)
 static int __init bsr_init(void)
 {
 	struct device_node *np;
-	dev_t bsr_dev = MKDEV(bsr_major, 0);
+	dev_t bsr_dev;
 	int ret = -ENODEV;
 	int result;
 
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 7066e801b9d3..051474c65b78 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -84,8 +84,6 @@ static struct clocksource clocksource_hpet = {
 	.rating		= 250,
 	.read		= read_hpet,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 0,		/* to be calculated */
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 static struct clocksource *hpet_clocksource;
@@ -934,9 +932,7 @@ int hpet_alloc(struct hpet_data *hdp)
 	if (!hpet_clocksource) {
 		hpet_mctr = (void __iomem *)&hpetp->hp_hpet->hpet_mc;
 		CLKSRC_FSYS_MMIO_SET(clocksource_hpet.fsys_mmio, hpet_mctr);
-		clocksource_hpet.mult = clocksource_hz2mult(hpetp->hp_tick_freq,
-						clocksource_hpet.shift);
-		clocksource_register(&clocksource_hpet);
+		clocksource_register_hz(&clocksource_hpet, hpetp->hp_tick_freq);
 		hpetp->hp_clocksource = &clocksource_hpet;
 		hpet_clocksource = &clocksource_hpet;
 	}
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 43ac61978d8b..ac6739e085e3 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -619,15 +619,18 @@ static void __devinit n2rng_driver_version(void)
 		pr_info("%s", version);
 }
 
+static const struct of_device_id n2rng_match[];
 static int __devinit n2rng_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	int victoria_falls;
 	int err = -ENOMEM;
 	struct n2rng *np;
 
-	if (!op->dev.of_match)
+	match = of_match_device(n2rng_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	victoria_falls = (op->dev.of_match->data != NULL);
+	victoria_falls = (match->data != NULL);
 
 	n2rng_driver_version();
 	np = kzalloc(sizeof(*np), GFP_KERNEL);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index cc6c9b2546a3..64c6b8530615 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2554,9 +2554,11 @@ static struct pci_driver ipmi_pci_driver = {
 };
 #endif /* CONFIG_PCI */
 
+static struct of_device_id ipmi_match[];
 static int __devinit ipmi_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_OF
+	const struct of_device_id *match;
 	struct smi_info *info;
 	struct resource resource;
 	const __be32 *regsize, *regspacing, *regshift;
@@ -2566,7 +2568,8 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 
 	dev_info(&dev->dev, "probing via device tree\n");
 
-	if (!dev->dev.of_match)
+	match = of_match_device(ipmi_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
 
 	ret = of_address_to_resource(np, 0, &resource);
@@ -2601,7 +2604,7 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 		return -ENOMEM;
 	}
 
-	info->si_type		= (enum si_type) dev->dev.of_match->data;
+	info->si_type		= (enum si_type) match->data;
 	info->addr_source	= SI_DEVICETREE;
 	info->irq_setup		= std_irq_setup;
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 436a99017998..8fc04b4f311f 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -806,29 +806,41 @@ static const struct file_operations oldmem_fops = {
 };
 #endif
 
-static ssize_t kmsg_write(struct file *file, const char __user *buf,
-			  size_t count, loff_t *ppos)
+static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv,
+			   unsigned long count, loff_t pos)
 {
-	char *tmp;
-	ssize_t ret;
+	char *line, *p;
+	int i;
+	ssize_t ret = -EFAULT;
+	size_t len = iov_length(iv, count);
 
-	tmp = kmalloc(count + 1, GFP_KERNEL);
-	if (tmp == NULL)
+	line = kmalloc(len + 1, GFP_KERNEL);
+	if (line == NULL)
 		return -ENOMEM;
-	ret = -EFAULT;
-	if (!copy_from_user(tmp, buf, count)) {
-		tmp[count] = 0;
-		ret = printk("%s", tmp);
-		if (ret > count)
-			/* printk can add a prefix */
-			ret = count;
+
+	/*
+	 * copy all vectors into a single string, to ensure we do
+	 * not interleave our log line with other printk calls
+	 */
+	p = line;
+	for (i = 0; i < count; i++) {
+		if (copy_from_user(p, iv[i].iov_base, iv[i].iov_len))
+			goto out;
+		p += iv[i].iov_len;
 	}
-	kfree(tmp);
+	p[0] = '\0';
+
+	ret = printk("%s", line);
+	/* printk can add a prefix */
+	if (ret > len)
+		ret = len;
+out:
+	kfree(line);
 	return ret;
 }
 
 static const struct file_operations kmsg_fops = {
-	.write = kmsg_write,
+	.aio_write = kmsg_writev,
 	.llseek = noop_llseek,
 };
 
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index b4b9d5a47885..b33e8ea314ed 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -21,6 +21,7 @@
 #include <linux/mutex.h>
 #include <linux/gfp.h>
 #include <linux/compat.h>
+#include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
 
@@ -30,10 +31,15 @@ struct raw_device_data {
 };
 
 static struct class *raw_class;
-static struct raw_device_data raw_devices[MAX_RAW_MINORS];
+static struct raw_device_data *raw_devices;
 static DEFINE_MUTEX(raw_mutex);
 static const struct file_operations raw_ctl_fops; /* forward declaration */
 
+static int max_raw_minors = MAX_RAW_MINORS;
+
+module_param(max_raw_minors, int, 0);
+MODULE_PARM_DESC(max_raw_minors, "Maximum number of raw devices (1-65536)");
+
 /*
  * Open/close code for raw IO.
  *
@@ -125,7 +131,7 @@ static int bind_set(int number, u64 major, u64 minor)
 	struct raw_device_data *rawdev;
 	int err = 0;
 
-	if (number <= 0 || number >= MAX_RAW_MINORS)
+	if (number <= 0 || number >= max_raw_minors)
 		return -EINVAL;
 
 	if (MAJOR(dev) != major || MINOR(dev) != minor)
@@ -312,14 +318,27 @@ static int __init raw_init(void)
 	dev_t dev = MKDEV(RAW_MAJOR, 0);
 	int ret;
 
-	ret = register_chrdev_region(dev, MAX_RAW_MINORS, "raw");
+	if (max_raw_minors < 1 || max_raw_minors > 65536) {
+		printk(KERN_WARNING "raw: invalid max_raw_minors (must be"
+			" between 1 and 65536), using %d\n", MAX_RAW_MINORS);
+		max_raw_minors = MAX_RAW_MINORS;
+	}
+
+	raw_devices = vmalloc(sizeof(struct raw_device_data) * max_raw_minors);
+	if (!raw_devices) {
+		printk(KERN_ERR "Not enough memory for raw device structures\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+	memset(raw_devices, 0, sizeof(struct raw_device_data) * max_raw_minors);
+
+	ret = register_chrdev_region(dev, max_raw_minors, "raw");
 	if (ret)
 		goto error;
 
 	cdev_init(&raw_cdev, &raw_fops);
-	ret = cdev_add(&raw_cdev, dev, MAX_RAW_MINORS);
+	ret = cdev_add(&raw_cdev, dev, max_raw_minors);
 	if (ret) {
-		kobject_put(&raw_cdev.kobj);
 		goto error_region;
 	}
 
@@ -336,8 +355,9 @@ static int __init raw_init(void)
 	return 0;
 
 error_region:
-	unregister_chrdev_region(dev, MAX_RAW_MINORS);
+	unregister_chrdev_region(dev, max_raw_minors);
 error:
+	vfree(raw_devices);
 	return ret;
 }
 
@@ -346,7 +366,7 @@ static void __exit raw_exit(void)
 	device_destroy(raw_class, MKDEV(RAW_MAJOR, 0));
 	class_destroy(raw_class);
 	cdev_del(&raw_cdev);
-	unregister_chrdev_region(MKDEV(RAW_MAJOR, 0), MAX_RAW_MINORS);
+	unregister_chrdev_region(MKDEV(RAW_MAJOR, 0), max_raw_minors);
 }
 
 module_init(raw_init);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 84b164d1eb2b..838568a7dbf5 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1280,18 +1280,7 @@ static void unplug_port(struct port *port)
 		spin_lock_irq(&pdrvdata_lock);
 		list_del(&port->cons.list);
 		spin_unlock_irq(&pdrvdata_lock);
-#if 0
-		/*
-		 * hvc_remove() not called as removing one hvc port
-		 * results in other hvc ports getting frozen.
-		 *
-		 * Once this is resolved in hvc, this functionality
-		 * will be enabled.  Till that is done, the -EPIPE
-		 * return from get_chars() above will help
-		 * hvc_console.c to clean up on ports we remove here.
-		 */
 		hvc_remove(port->cons.hvc);
-#endif
 	}
 
 	/* Remove unused data this port might have received. */
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index d6412c16385f..39ccdeada791 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -715,13 +715,13 @@ static int __devexit hwicap_remove(struct device *dev)
 }
 
 #ifdef CONFIG_OF
-static int __devinit hwicap_of_probe(struct platform_device *op)
+static int __devinit hwicap_of_probe(struct platform_device *op,
+				     const struct hwicap_driver_config *config)
 {
 	struct resource res;
 	const unsigned int *id;
 	const char *family;
 	int rc;
-	const struct hwicap_driver_config *config = op->dev.of_match->data;
 	const struct config_registers *regs;
 
 
@@ -751,20 +751,24 @@ static int __devinit hwicap_of_probe(struct platform_device *op)
 			regs);
 }
 #else
-static inline int hwicap_of_probe(struct platform_device *op)
+static inline int hwicap_of_probe(struct platform_device *op,
+				  const struct hwicap_driver_config *config)
 {
 	return -EINVAL;
 }
 #endif /* CONFIG_OF */
 
+static const struct of_device_id __devinitconst hwicap_of_match[];
 static int __devinit hwicap_drv_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *match;
 	struct resource *res;
 	const struct config_registers *regs;
 	const char *family;
 
-	if (pdev->dev.of_match)
-		return hwicap_of_probe(pdev);
+	match = of_match_device(hwicap_of_match, &pdev->dev);
+	if (match)
+		return hwicap_of_probe(pdev, match->data);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
index 0fc0a79852de..6db161f64ae0 100644
--- a/drivers/clk/clkdev.c
+++ b/drivers/clk/clkdev.c
@@ -32,10 +32,9 @@ static DEFINE_MUTEX(clocks_mutex);
  * Then we take the most specific entry - with the following
  * order of precedence: dev+con > dev only > con only.
  */
-static struct clk *clk_find(const char *dev_id, const char *con_id)
+static struct clk_lookup *clk_find(const char *dev_id, const char *con_id)
 {
-	struct clk_lookup *p;
-	struct clk *clk = NULL;
+	struct clk_lookup *p, *cl = NULL;
 	int match, best = 0;
 
 	list_for_each_entry(p, &clocks, node) {
@@ -52,27 +51,27 @@ static struct clk *clk_find(const char *dev_id, const char *con_id)
 		}
 
 		if (match > best) {
-			clk = p->clk;
+			cl = p;
 			if (match != 3)
 				best = match;
 			else
 				break;
 		}
 	}
-	return clk;
+	return cl;
 }
 
 struct clk *clk_get_sys(const char *dev_id, const char *con_id)
 {
-	struct clk *clk;
+	struct clk_lookup *cl;
 
 	mutex_lock(&clocks_mutex);
-	clk = clk_find(dev_id, con_id);
-	if (clk && !__clk_get(clk))
-		clk = NULL;
+	cl = clk_find(dev_id, con_id);
+	if (cl && !__clk_get(cl->clk))
+		cl = NULL;
 	mutex_unlock(&clocks_mutex);
 
-	return clk ? clk : ERR_PTR(-ENOENT);
+	return cl ? cl->clk : ERR_PTR(-ENOENT);
 }
 EXPORT_SYMBOL(clk_get_sys);
 
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
new file mode 100644
index 000000000000..110aeeb52f9a
--- /dev/null
+++ b/drivers/clocksource/Kconfig
@@ -0,0 +1,2 @@
+config CLKSRC_I8253
+	bool
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index be61ece6330b..cfb6383b543a 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC)	+= cs5535-clockevt.o
 obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
 obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
 obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
+obj-$(CONFIG_CLKSRC_I8253)	+= i8253.o
diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c
index 64e528e8bfa6..72f811f73e9c 100644
--- a/drivers/clocksource/cyclone.c
+++ b/drivers/clocksource/cyclone.c
@@ -29,8 +29,6 @@ static struct clocksource clocksource_cyclone = {
 	.rating		= 250,
 	.read		= read_cyclone,
 	.mask		= CYCLONE_TIMER_MASK,
-	.mult		= 10,
-	.shift		= 0,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -108,12 +106,8 @@ static int __init init_cyclone_clocksource(void)
 	}
 	cyclone_ptr = cyclone_timer;
 
-	/* sort out mult/shift values: */
-	clocksource_cyclone.shift = 22;
-	clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
-						clocksource_cyclone.shift);
-
-	return clocksource_register(&clocksource_cyclone);
+	return clocksource_register_hz(&clocksource_cyclone,
+					CYCLONE_TIMER_FREQ);
 }
 
 arch_initcall(init_cyclone_clocksource);
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
new file mode 100644
index 000000000000..225c1761b372
--- /dev/null
+++ b/drivers/clocksource/i8253.c
@@ -0,0 +1,88 @@
+/*
+ * i8253 PIT clocksource
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/timex.h>
+
+#include <asm/i8253.h>
+
+/*
+ * Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static cycle_t i8253_read(struct clocksource *cs)
+{
+	static int old_count;
+	static u32 old_jifs;
+	unsigned long flags;
+	int count;
+	u32 jifs;
+
+	raw_spin_lock_irqsave(&i8253_lock, flags);
+	/*
+	 * Although our caller may have the read side of xtime_lock,
+	 * this is now a seqlock, and we are cheating in this routine
+	 * by having side effects on state that we cannot undo if
+	 * there is a collision on the seqlock and our caller has to
+	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
+	 * jiffies as volatile despite the lock.  We read jiffies
+	 * before latching the timer count to guarantee that although
+	 * the jiffies value might be older than the count (that is,
+	 * the counter may underflow between the last point where
+	 * jiffies was incremented and the point where we latch the
+	 * count), it cannot be newer.
+	 */
+	jifs = jiffies;
+	outb_pit(0x00, PIT_MODE);	/* latch the count ASAP */
+	count = inb_pit(PIT_CH0);	/* read the latched count */
+	count |= inb_pit(PIT_CH0) << 8;
+
+	/* VIA686a test code... reset the latch if count > max + 1 */
+	if (count > LATCH) {
+		outb_pit(0x34, PIT_MODE);
+		outb_pit(PIT_LATCH & 0xff, PIT_CH0);
+		outb_pit(PIT_LATCH >> 8, PIT_CH0);
+		count = PIT_LATCH - 1;
+	}
+
+	/*
+	 * It's possible for count to appear to go the wrong way for a
+	 * couple of reasons:
+	 *
+	 *  1. The timer counter underflows, but we haven't handled the
+	 *     resulting interrupt and incremented jiffies yet.
+	 *  2. Hardware problem with the timer, not giving us continuous time,
+	 *     the counter does small "jumps" upwards on some Pentium systems,
+	 *     (see c't 95/10 page 335 for Neptun bug.)
+	 *
+	 * Previous attempts to handle these cases intelligently were
+	 * buggy, so we just do the simple thing now.
+	 */
+	if (count > old_count && jifs == old_jifs)
+		count = old_count;
+
+	old_count = count;
+	old_jifs = jifs;
+
+	raw_spin_unlock_irqrestore(&i8253_lock, flags);
+
+	count = (PIT_LATCH - 1) - count;
+
+	return (cycle_t)(jifs * PIT_LATCH) + count;
+}
+
+static struct clocksource i8253_cs = {
+	.name		= "pit",
+	.rating		= 110,
+	.read		= i8253_read,
+	.mask		= CLOCKSOURCE_MASK(32),
+};
+
+int __init clocksource_i8253_init(void)
+{
+	return clocksource_register_hz(&i8253_cs, PIT_TICK_RATE);
+}
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index ca8ee8093d6c..9fb84853d8e3 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -1,3 +1,5 @@
+menu "CPU Frequency scaling"
+
 config CPU_FREQ
 	bool "CPU Frequency scaling"
 	help
@@ -18,19 +20,6 @@ if CPU_FREQ
 config CPU_FREQ_TABLE
 	tristate
 
-config CPU_FREQ_DEBUG
-	bool "Enable CPUfreq debugging"
-	help
-	  Say Y here to enable CPUfreq subsystem (including drivers)
-	  debugging. You will need to activate it via the kernel
-	  command line by passing
-	     cpufreq.debug=<value>
-
-	  To get <value>, add 
-	       1 to activate CPUfreq core debugging,
-	       2 to activate CPUfreq drivers debugging, and
-	       4 to activate CPUfreq governor debugging
-
 config CPU_FREQ_STAT
 	tristate "CPU frequency translation statistics"
 	select CPU_FREQ_TABLE
@@ -190,4 +179,10 @@ config CPU_FREQ_GOV_CONSERVATIVE
 
 	  If in doubt, say N.
 
-endif	# CPU_FREQ
+menu "x86 CPU frequency scaling drivers"
+depends on X86
+source "drivers/cpufreq/Kconfig.x86"
+endmenu
+
+endif
+endmenu
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/drivers/cpufreq/Kconfig.x86
index 870e6cc6ad28..78ff7ee48951 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig.x86
@@ -1,15 +1,7 @@
 #
-# CPU Frequency scaling
+# x86 CPU Frequency scaling drivers
 #
 
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
 config X86_PCC_CPUFREQ
 	tristate "Processor Clocking Control interface driver"
 	depends on ACPI && ACPI_PROCESSOR
@@ -43,7 +35,7 @@ config X86_ACPI_CPUFREQ
 config ELAN_CPUFREQ
 	tristate "AMD Elan SC400 and SC410"
 	select CPU_FREQ_TABLE
-	depends on X86_ELAN
+	depends on MELAN
 	---help---
 	  This adds the CPUFreq driver for AMD Elan SC400 and SC410
 	  processors.
@@ -59,7 +51,7 @@ config ELAN_CPUFREQ
 config SC520_CPUFREQ
 	tristate "AMD Elan SC520"
 	select CPU_FREQ_TABLE
-	depends on X86_ELAN
+	depends on MELAN
 	---help---
 	  This adds the CPUFreq driver for AMD Elan SC520 processor.
 
@@ -261,6 +253,3 @@ config X86_SPEEDSTEP_RELAXED_CAP_CHECK
 	  option lets the probing code bypass some of those checks if the
 	  parameter "relaxed_check=1" is passed to the module.
 
-endif	# CPU_FREQ
-
-endmenu
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 71fc3b4173f1..c7f1a6f16b6e 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -13,3 +13,29 @@ obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
 # CPUfreq cross-arch helpers
 obj-$(CONFIG_CPU_FREQ_TABLE)		+= freq_table.o
 
+##################################################################################d
+# x86 drivers.
+# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
+# K8 systems. ACPI is preferred to all other hardware-specific drivers.
+# speedstep-* is preferred over p4-clockmod.
+
+obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o mperf.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o mperf.o
+obj-$(CONFIG_X86_PCC_CPUFREQ)		+= pcc-cpufreq.o
+obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
+obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
+obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
+obj-$(CONFIG_X86_E_POWERSAVER)		+= e_powersaver.o
+obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
+obj-$(CONFIG_SC520_CPUFREQ)		+= sc520_freq.o
+obj-$(CONFIG_X86_LONGRUN)		+= longrun.o
+obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
+obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
+obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
+obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
+obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
+obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
+obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
+
+##################################################################################d
+
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index a2baafb2fe6d..4e04e1274388 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -47,9 +47,6 @@
 #include <asm/cpufeature.h>
 #include "mperf.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -233,7 +230,7 @@ static u32 get_cur_val(const struct cpumask *mask)
 	cmd.mask = mask;
 	drv_read(&cmd);
 
-	dprintk("get_cur_val = %u\n", cmd.val);
+	pr_debug("get_cur_val = %u\n", cmd.val);
 
 	return cmd.val;
 }
@@ -244,7 +241,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	unsigned int freq;
 	unsigned int cached_freq;
 
-	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
+	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
 
 	if (unlikely(data == NULL ||
 		     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -261,7 +258,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 		data->resume = 1;
 	}
 
-	dprintk("cur freq = %u\n", freq);
+	pr_debug("cur freq = %u\n", freq);
 
 	return freq;
 }
@@ -293,7 +290,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int i;
 	int result = 0;
 
-	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
 	if (unlikely(data == NULL ||
 	     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -313,11 +310,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	next_perf_state = data->freq_table[next_state].index;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n",
+			pr_debug("Called after resume, resetting to P%d\n",
 				next_perf_state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n",
+			pr_debug("Already at target state (P%d)\n",
 				next_perf_state);
 			goto out;
 		}
@@ -357,7 +354,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 
 	if (acpi_pstate_strict) {
 		if (!check_freqs(cmd.mask, freqs.new, data)) {
-			dprintk("acpi_cpufreq_target failed (%d)\n",
+			pr_debug("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
 			result = -EAGAIN;
 			goto out;
@@ -378,7 +375,7 @@ static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	return cpufreq_frequency_table_verify(policy, data->freq_table);
 }
@@ -433,11 +430,11 @@ static void free_acpi_perf_data(void)
 static int __init acpi_cpufreq_early_init(void)
 {
 	unsigned int i;
-	dprintk("acpi_cpufreq_early_init\n");
+	pr_debug("acpi_cpufreq_early_init\n");
 
 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
 	if (!acpi_perf_data) {
-		dprintk("Memory allocation error for acpi_perf_data.\n");
+		pr_debug("Memory allocation error for acpi_perf_data.\n");
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
@@ -519,7 +516,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	static int blacklisted;
 #endif
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 #ifdef CONFIG_SMP
 	if (blacklisted)
@@ -566,7 +563,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	/* capability check */
 	if (perf->state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -578,11 +575,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	switch (perf->control_register.space_id) {
 	case ACPI_ADR_SPACE_SYSTEM_IO:
-		dprintk("SYSTEM IO addr space\n");
+		pr_debug("SYSTEM IO addr space\n");
 		data->cpu_feature = SYSTEM_IO_CAPABLE;
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		dprintk("HARDWARE addr space\n");
+		pr_debug("HARDWARE addr space\n");
 		if (!check_est_cpu(cpu)) {
 			result = -ENODEV;
 			goto err_unreg;
@@ -590,7 +587,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
 		break;
 	default:
-		dprintk("Unknown addr space %d\n",
+		pr_debug("Unknown addr space %d\n",
 			(u32) (perf->control_register.space_id));
 		result = -ENODEV;
 		goto err_unreg;
@@ -661,9 +658,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (cpu_has(c, X86_FEATURE_APERFMPERF))
 		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
 
-	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
+	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
 	for (i = 0; i < perf->state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
 			(i == perf->state ? '*' : ' '), i,
 			(u32) perf->states[i].core_frequency,
 			(u32) perf->states[i].power,
@@ -694,7 +691,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -712,7 +709,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_resume\n");
+	pr_debug("acpi_cpufreq_resume\n");
 
 	data->resume = 1;
 
@@ -743,7 +740,7 @@ static int __init acpi_cpufreq_init(void)
 	if (acpi_disabled)
 		return 0;
 
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
 	ret = acpi_cpufreq_early_init();
 	if (ret)
@@ -758,7 +755,7 @@ static int __init acpi_cpufreq_init(void)
 
 static void __exit acpi_cpufreq_exit(void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index 141abebc4516..7bac808804f3 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -57,8 +57,6 @@ MODULE_PARM_DESC(min_fsb,
 		"Minimum FSB to use, if not defined: current FSB - 50");
 
 #define PFX "cpufreq-nforce2: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"cpufreq-nforce2", msg)
 
 /**
  * nforce2_calc_fsb - calculate FSB
@@ -270,7 +268,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
-	dprintk("Old CPU frequency %d kHz, new %d kHz\n",
+	pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
 	       freqs.old, freqs.new);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
@@ -282,7 +280,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
 			target_fsb);
 	else
-		dprintk("Changed FSB successfully to %d\n",
+		pr_debug("Changed FSB successfully to %d\n",
 			target_fsb);
 
 	/* Enable IRQs */
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 2dafc5c38ae7..0a5bea9e3585 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -32,9 +32,6 @@
 
 #include <trace/events/power.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /**
  * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
@@ -181,93 +178,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
 
 /*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-/* what part(s) of the CPUfreq subsystem are debugged? */
-static unsigned int debug;
-
-/* is the debug output ratelimit'ed using printk_ratelimit? User can
- * set or modify this value.
- */
-static unsigned int debug_ratelimit = 1;
-
-/* is the printk_ratelimit'ing enabled? It's enabled after a successful
- * loading of a cpufreq driver, temporarily disabled when a new policy
- * is set, and disabled upon cpufreq driver removal
- */
-static unsigned int disable_ratelimit = 1;
-static DEFINE_SPINLOCK(disable_ratelimit_lock);
-
-static void cpufreq_debug_enable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	if (disable_ratelimit)
-		disable_ratelimit--;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-static void cpufreq_debug_disable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	disable_ratelimit++;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-void cpufreq_debug_printk(unsigned int type, const char *prefix,
-			const char *fmt, ...)
-{
-	char s[256];
-	va_list args;
-	unsigned int len;
-	unsigned long flags;
-
-	WARN_ON(!prefix);
-	if (type & debug) {
-		spin_lock_irqsave(&disable_ratelimit_lock, flags);
-		if (!disable_ratelimit && debug_ratelimit
-					&& !printk_ratelimit()) {
-			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-			return;
-		}
-		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-
-		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
-
-		va_start(args, fmt);
-		len += vsnprintf(&s[len], (256 - len), fmt, args);
-		va_end(args);
-
-		printk(s);
-
-		WARN_ON(len < 5);
-	}
-}
-EXPORT_SYMBOL(cpufreq_debug_printk);
-
-
-module_param(debug, uint, 0644);
-MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
-			" 2 to debug drivers, and 4 to debug governors.");
-
-module_param(debug_ratelimit, uint, 0644);
-MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
-					" set to 0 to disable ratelimiting.");
-
-#else /* !CONFIG_CPU_FREQ_DEBUG */
-
-static inline void cpufreq_debug_enable_ratelimit(void) { return; }
-static inline void cpufreq_debug_disable_ratelimit(void) { return; }
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
-
-/*********************************************************************
  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
  *********************************************************************/
 
@@ -291,7 +201,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	if (!l_p_j_ref_freq) {
 		l_p_j_ref = loops_per_jiffy;
 		l_p_j_ref_freq = ci->old;
-		dprintk("saving %lu as reference value for loops_per_jiffy; "
+		pr_debug("saving %lu as reference value for loops_per_jiffy; "
 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
 	}
 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
@@ -299,7 +209,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
 								ci->new);
-		dprintk("scaling loops_per_jiffy to %lu "
+		pr_debug("scaling loops_per_jiffy to %lu "
 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
 	}
 }
@@ -326,7 +236,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 	BUG_ON(irqs_disabled());
 
 	freqs->flags = cpufreq_driver->flags;
-	dprintk("notification %u of frequency transition to %u kHz\n",
+	pr_debug("notification %u of frequency transition to %u kHz\n",
 		state, freqs->new);
 
 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
@@ -340,7 +250,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
 			if ((policy) && (policy->cpu == freqs->cpu) &&
 			    (policy->cur) && (policy->cur != freqs->old)) {
-				dprintk("Warning: CPU frequency is"
+				pr_debug("Warning: CPU frequency is"
 					" %u, cpufreq assumed %u kHz.\n",
 					freqs->old, policy->cur);
 				freqs->old = policy->cur;
@@ -353,7 +263,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
-		dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
+		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
 			(unsigned long)freqs->cpu);
 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 		trace_cpu_frequency(freqs->new, freqs->cpu);
@@ -411,21 +321,14 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
 		t = __find_governor(str_governor);
 
 		if (t == NULL) {
-			char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
-								str_governor);
-
-			if (name) {
-				int ret;
+			int ret;
 
-				mutex_unlock(&cpufreq_governor_mutex);
-				ret = request_module("%s", name);
-				mutex_lock(&cpufreq_governor_mutex);
+			mutex_unlock(&cpufreq_governor_mutex);
+			ret = request_module("cpufreq_%s", str_governor);
+			mutex_lock(&cpufreq_governor_mutex);
 
-				if (ret == 0)
-					t = __find_governor(str_governor);
-			}
-
-			kfree(name);
+			if (ret == 0)
+				t = __find_governor(str_governor);
 		}
 
 		if (t != NULL) {
@@ -753,7 +656,7 @@ no_policy:
 static void cpufreq_sysfs_release(struct kobject *kobj)
 {
 	struct cpufreq_policy *policy = to_policy(kobj);
-	dprintk("last reference is dropped\n");
+	pr_debug("last reference is dropped\n");
 	complete(&policy->kobj_unregister);
 }
 
@@ -788,7 +691,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
 	if (gov) {
 		policy->governor = gov;
-		dprintk("Restoring governor %s for cpu %d\n",
+		pr_debug("Restoring governor %s for cpu %d\n",
 		       policy->governor->name, cpu);
 	}
 #endif
@@ -824,7 +727,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-			dprintk("CPU already managed, adding link\n");
+			pr_debug("CPU already managed, adding link\n");
 			ret = sysfs_create_link(&sys_dev->kobj,
 						&managed_policy->kobj,
 						"cpufreq");
@@ -865,7 +768,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
 		if (!cpu_online(j))
 			continue;
 
-		dprintk("CPU %u already managed, adding link\n", j);
+		pr_debug("CPU %u already managed, adding link\n", j);
 		managed_policy = cpufreq_cpu_get(cpu);
 		cpu_sys_dev = get_cpu_sysdev(j);
 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
@@ -941,7 +844,7 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 	policy->user_policy.governor = policy->governor;
 
 	if (ret) {
-		dprintk("setting policy failed\n");
+		pr_debug("setting policy failed\n");
 		if (cpufreq_driver->exit)
 			cpufreq_driver->exit(policy);
 	}
@@ -977,8 +880,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	if (cpu_is_offline(cpu))
 		return 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("adding CPU %u\n", cpu);
+	pr_debug("adding CPU %u\n", cpu);
 
 #ifdef CONFIG_SMP
 	/* check whether a different CPU already registered this
@@ -986,7 +888,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	policy = cpufreq_cpu_get(cpu);
 	if (unlikely(policy)) {
 		cpufreq_cpu_put(policy);
-		cpufreq_debug_enable_ratelimit();
 		return 0;
 	}
 #endif
@@ -1037,7 +938,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	 */
 	ret = cpufreq_driver->init(policy);
 	if (ret) {
-		dprintk("initialization failed\n");
+		pr_debug("initialization failed\n");
 		goto err_unlock_policy;
 	}
 	policy->user_policy.min = policy->min;
@@ -1063,8 +964,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
 	module_put(cpufreq_driver->owner);
-	dprintk("initialization complete\n");
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("initialization complete\n");
 
 	return 0;
 
@@ -1088,7 +988,6 @@ err_free_policy:
 nomem_out:
 	module_put(cpufreq_driver->owner);
 module_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1112,15 +1011,13 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	unsigned int j;
 #endif
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("unregistering CPU %u\n", cpu);
+	pr_debug("unregistering CPU %u\n", cpu);
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	data = per_cpu(cpufreq_cpu_data, cpu);
 
 	if (!data) {
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		return -EINVAL;
 	}
@@ -1132,12 +1029,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * only need to unlink, put and exit
 	 */
 	if (unlikely(cpu != data->cpu)) {
-		dprintk("removing link\n");
+		pr_debug("removing link\n");
 		cpumask_clear_cpu(cpu, data->cpus);
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 		kobj = &sys_dev->kobj;
 		cpufreq_cpu_put(data);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		sysfs_remove_link(kobj, "cpufreq");
 		return 0;
@@ -1170,7 +1066,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 		for_each_cpu(j, data->cpus) {
 			if (j == cpu)
 				continue;
-			dprintk("removing link for cpu %u\n", j);
+			pr_debug("removing link for cpu %u\n", j);
 #ifdef CONFIG_HOTPLUG_CPU
 			strncpy(per_cpu(cpufreq_cpu_governor, j),
 				data->governor->name, CPUFREQ_NAME_LEN);
@@ -1199,21 +1095,35 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * not referenced anymore by anybody before we proceed with
 	 * unloading.
 	 */
-	dprintk("waiting for dropping of refcount\n");
+	pr_debug("waiting for dropping of refcount\n");
 	wait_for_completion(cmp);
-	dprintk("wait complete\n");
+	pr_debug("wait complete\n");
 
 	lock_policy_rwsem_write(cpu);
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(data);
 	unlock_policy_rwsem_write(cpu);
 
+#ifdef CONFIG_HOTPLUG_CPU
+	/* when the CPU which is the parent of the kobj is hotplugged
+	 * offline, check for siblings, and create cpufreq sysfs interface
+	 * and symlinks
+	 */
+	if (unlikely(cpumask_weight(data->cpus) > 1)) {
+		/* first sibling now owns the new sysfs dir */
+		cpumask_clear_cpu(cpu, data->cpus);
+		cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus)));
+
+		/* finally remove our own symlink */
+		lock_policy_rwsem_write(cpu);
+		__cpufreq_remove_dev(sys_dev);
+	}
+#endif
+
 	free_cpumask_var(data->related_cpus);
 	free_cpumask_var(data->cpus);
 	kfree(data);
-	per_cpu(cpufreq_cpu_data, cpu) = NULL;
 
-	cpufreq_debug_enable_ratelimit();
 	return 0;
 }
 
@@ -1239,7 +1149,7 @@ static void handle_update(struct work_struct *work)
 	struct cpufreq_policy *policy =
 		container_of(work, struct cpufreq_policy, update);
 	unsigned int cpu = policy->cpu;
-	dprintk("handle_update for cpu %u called\n", cpu);
+	pr_debug("handle_update for cpu %u called\n", cpu);
 	cpufreq_update_policy(cpu);
 }
 
@@ -1257,7 +1167,7 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
 {
 	struct cpufreq_freqs freqs;
 
-	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
+	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
 
 	freqs.cpu = cpu;
@@ -1360,7 +1270,7 @@ static int cpufreq_bp_suspend(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("suspending cpu %u\n", cpu);
+	pr_debug("suspending cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1398,7 +1308,7 @@ static void cpufreq_bp_resume(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("resuming cpu %u\n", cpu);
+	pr_debug("resuming cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1510,7 +1420,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 {
 	int retval = -EINVAL;
 
-	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
+	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
 		target_freq, relation);
 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
@@ -1596,7 +1506,7 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	if (!try_module_get(policy->governor->owner))
 		return -EINVAL;
 
-	dprintk("__cpufreq_governor for CPU %u, event %u\n",
+	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
 						policy->cpu, event);
 	ret = policy->governor->governor(policy, event);
 
@@ -1697,8 +1607,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 {
 	int ret = 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
+	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
 		policy->min, policy->max);
 
 	memcpy(&policy->cpuinfo, &data->cpuinfo,
@@ -1735,19 +1644,19 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 	data->min = policy->min;
 	data->max = policy->max;
 
-	dprintk("new min and max freqs are %u - %u kHz\n",
+	pr_debug("new min and max freqs are %u - %u kHz\n",
 					data->min, data->max);
 
 	if (cpufreq_driver->setpolicy) {
 		data->policy = policy->policy;
-		dprintk("setting range\n");
+		pr_debug("setting range\n");
 		ret = cpufreq_driver->setpolicy(policy);
 	} else {
 		if (policy->governor != data->governor) {
 			/* save old, working values */
 			struct cpufreq_governor *old_gov = data->governor;
 
-			dprintk("governor switch\n");
+			pr_debug("governor switch\n");
 
 			/* end old governor */
 			if (data->governor)
@@ -1757,7 +1666,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			data->governor = policy->governor;
 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
 				/* new governor failed, so re-start old one */
-				dprintk("starting governor %s failed\n",
+				pr_debug("starting governor %s failed\n",
 							data->governor->name);
 				if (old_gov) {
 					data->governor = old_gov;
@@ -1769,12 +1678,11 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			}
 			/* might be a policy change, too, so fall through */
 		}
-		dprintk("governor: change or update limits\n");
+		pr_debug("governor: change or update limits\n");
 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
 	}
 
 error_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1801,7 +1709,7 @@ int cpufreq_update_policy(unsigned int cpu)
 		goto fail;
 	}
 
-	dprintk("updating policy for CPU %u\n", cpu);
+	pr_debug("updating policy for CPU %u\n", cpu);
 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
 	policy.min = data->user_policy.min;
 	policy.max = data->user_policy.max;
@@ -1813,7 +1721,7 @@ int cpufreq_update_policy(unsigned int cpu)
 	if (cpufreq_driver->get) {
 		policy.cur = cpufreq_driver->get(cpu);
 		if (!data->cur) {
-			dprintk("Driver did not initialize current freq");
+			pr_debug("Driver did not initialize current freq");
 			data->cur = policy.cur;
 		} else {
 			if (data->cur != policy.cur)
@@ -1889,7 +1797,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	    ((!driver_data->setpolicy) && (!driver_data->target)))
 		return -EINVAL;
 
-	dprintk("trying to register driver %s\n", driver_data->name);
+	pr_debug("trying to register driver %s\n", driver_data->name);
 
 	if (driver_data->setpolicy)
 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
@@ -1920,15 +1828,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 
 		/* if all ->init() calls failed, unregister */
 		if (ret) {
-			dprintk("no CPU initialized for driver %s\n",
+			pr_debug("no CPU initialized for driver %s\n",
 							driver_data->name);
 			goto err_sysdev_unreg;
 		}
 	}
 
 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
-	dprintk("driver %s up and running\n", driver_data->name);
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("driver %s up and running\n", driver_data->name);
 
 	return 0;
 err_sysdev_unreg:
@@ -1955,14 +1862,10 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 {
 	unsigned long flags;
 
-	cpufreq_debug_disable_ratelimit();
-
-	if (!cpufreq_driver || (driver != cpufreq_driver)) {
-		cpufreq_debug_enable_ratelimit();
+	if (!cpufreq_driver || (driver != cpufreq_driver))
 		return -EINVAL;
-	}
 
-	dprintk("unregistering driver %s\n", driver->name);
+	pr_debug("unregistering driver %s\n", driver->name);
 
 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index 7e2e515087f8..f13a8a9af6a1 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -15,9 +15,6 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "performance", msg)
-
 
 static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 					unsigned int event)
@@ -25,7 +22,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 						policy->max, event);
 		__cpufreq_driver_target(policy, policy->max,
 						CPUFREQ_RELATION_H);
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index e6db5faf3eb1..4c2eb512f2bc 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -15,16 +15,13 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "powersave", msg)
-
 static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
 					unsigned int event)
 {
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 							policy->min, event);
 		__cpufreq_driver_target(policy, policy->min,
 						CPUFREQ_RELATION_L);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 00d73fc8e4e2..b60a4c263686 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -165,17 +165,27 @@ static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq)
 	return -1;
 }
 
+/* should be called late in the CPU removal sequence so that the stats
+ * memory is still available in case someone tries to use it.
+ */
 static void cpufreq_stats_free_table(unsigned int cpu)
 {
 	struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu);
-	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-	if (policy && policy->cpu == cpu)
-		sysfs_remove_group(&policy->kobj, &stats_attr_group);
 	if (stat) {
 		kfree(stat->time_in_state);
 		kfree(stat);
 	}
 	per_cpu(cpufreq_stats_table, cpu) = NULL;
+}
+
+/* must be called early in the CPU removal sequence (before
+ * cpufreq_remove_dev) so that policy is still valid.
+ */
+static void cpufreq_stats_free_sysfs(unsigned int cpu)
+{
+	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+	if (policy && policy->cpu == cpu)
+		sysfs_remove_group(&policy->kobj, &stats_attr_group);
 	if (policy)
 		cpufreq_cpu_put(policy);
 }
@@ -316,6 +326,9 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
 	case CPU_ONLINE_FROZEN:
 		cpufreq_update_policy(cpu);
 		break;
+	case CPU_DOWN_PREPARE:
+		cpufreq_stats_free_sysfs(cpu);
+		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		cpufreq_stats_free_table(cpu);
@@ -324,9 +337,10 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block cpufreq_stat_cpu_notifier __refdata =
-{
+/* priority=1 so this will get called before cpufreq_remove_dev */
+static struct notifier_block cpufreq_stat_cpu_notifier __refdata = {
 	.notifier_call = cpufreq_stat_cpu_callback,
+	.priority = 1,
 };
 
 static struct notifier_block notifier_policy_block = {
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 66d2d1d6c80f..f231015904c0 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -37,9 +37,6 @@ static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
 static DEFINE_MUTEX(userspace_mutex);
 static int cpus_using_userspace_governor;
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg)
-
 /* keep track of frequency transitions */
 static int
 userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -50,7 +47,7 @@ userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	if (!per_cpu(cpu_is_managed, freq->cpu))
 		return 0;
 
-	dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n",
+	pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n",
 			freq->cpu, freq->new);
 	per_cpu(cpu_cur_freq, freq->cpu) = freq->new;
 
@@ -73,7 +70,7 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 {
 	int ret = -EINVAL;
 
-	dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
+	pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
 
 	mutex_lock(&userspace_mutex);
 	if (!per_cpu(cpu_is_managed, policy->cpu))
@@ -134,7 +131,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_max_freq, cpu) = policy->max;
 		per_cpu(cpu_cur_freq, cpu) = policy->cur;
 		per_cpu(cpu_set_freq, cpu) = policy->cur;
-		dprintk("managing cpu %u started "
+		pr_debug("managing cpu %u started "
 			"(%u - %u kHz, currently %u kHz)\n",
 				cpu,
 				per_cpu(cpu_min_freq, cpu),
@@ -156,12 +153,12 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_min_freq, cpu) = 0;
 		per_cpu(cpu_max_freq, cpu) = 0;
 		per_cpu(cpu_set_freq, cpu) = 0;
-		dprintk("managing cpu %u stopped\n", cpu);
+		pr_debug("managing cpu %u stopped\n", cpu);
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_LIMITS:
 		mutex_lock(&userspace_mutex);
-		dprintk("limit event for cpu %u: %u - %u kHz, "
+		pr_debug("limit event for cpu %u: %u - %u kHz, "
 			"currently %u kHz, last set to %u kHz\n",
 			cpu, policy->min, policy->max,
 			per_cpu(cpu_cur_freq, cpu),
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index 35a257dd4bb7..35a257dd4bb7 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index c587db472a75..c587db472a75 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 05432216e224..90431cb92804 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -14,9 +14,6 @@
 #include <linux/init.h>
 #include <linux/cpufreq.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "freq-table", msg)
-
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
  *********************************************************************/
@@ -31,11 +28,11 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
 		unsigned int freq = table[i].frequency;
 		if (freq == CPUFREQ_ENTRY_INVALID) {
-			dprintk("table entry %u is invalid, skipping\n", i);
+			pr_debug("table entry %u is invalid, skipping\n", i);
 
 			continue;
 		}
-		dprintk("table entry %u: %u kHz, %u index\n",
+		pr_debug("table entry %u: %u kHz, %u index\n",
 					i, freq, table[i].index);
 		if (freq < min_freq)
 			min_freq = freq;
@@ -61,7 +58,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	unsigned int i;
 	unsigned int count = 0;
 
-	dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n",
+	pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n",
 					policy->min, policy->max, policy->cpu);
 
 	if (!cpu_online(policy->cpu))
@@ -86,7 +83,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
 				     policy->cpuinfo.max_freq);
 
-	dprintk("verification lead to (%u - %u kHz) for cpu %u\n",
+	pr_debug("verification lead to (%u - %u kHz) for cpu %u\n",
 				policy->min, policy->max, policy->cpu);
 
 	return 0;
@@ -110,7 +107,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	};
 	unsigned int i;
 
-	dprintk("request for target %u kHz (relation: %u) for cpu %u\n",
+	pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
 					target_freq, relation, policy->cpu);
 
 	switch (relation) {
@@ -167,7 +164,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	} else
 		*index = optimal.index;
 
-	dprintk("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
+	pr_debug("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
 		table[*index].index);
 
 	return 0;
@@ -216,14 +213,14 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs);
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu)
 {
-	dprintk("setting show_table for cpu %u to %p\n", cpu, table);
+	pr_debug("setting show_table for cpu %u to %p\n", cpu, table);
 	per_cpu(cpufreq_show_table, cpu) = table;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu)
 {
-	dprintk("clearing show_table for cpu %u\n", cpu);
+	pr_debug("clearing show_table for cpu %u\n", cpu);
 	per_cpu(cpufreq_show_table, cpu) = NULL;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c
index 32974cf84232..ffe1f2c92ed3 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/drivers/cpufreq/gx-suspmod.c
@@ -142,9 +142,6 @@ module_param(max_duration, int, 0444);
 #define POLICY_MIN_DIV 20
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"gx-suspmod", msg)
-
 /**
  * we can detect a core multipiler from dir0_lsb
  * from GX1 datasheet p.56,
@@ -191,7 +188,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 	/* check if CPU is a MediaGX or a Geode. */
 	if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
 	    (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
-		dprintk("error: no MediaGX/Geode processor found!\n");
+		pr_debug("error: no MediaGX/Geode processor found!\n");
 		return NULL;
 	}
 
@@ -201,7 +198,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 			return gx_pci;
 	}
 
-	dprintk("error: no supported chipset found!\n");
+	pr_debug("error: no supported chipset found!\n");
 	return NULL;
 }
 
@@ -305,14 +302,14 @@ static void gx_set_cpuspeed(unsigned int khz)
 			break;
 		default:
 			local_irq_restore(flags);
-			dprintk("fatal: try to set unknown chipset.\n");
+			pr_debug("fatal: try to set unknown chipset.\n");
 			return;
 		}
 	} else {
 		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
 		gx_params->off_duration = 0;
 		gx_params->on_duration = 0;
-		dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
+		pr_debug("suspend modulation disabled: cpu runs 100%% speed.\n");
 	}
 
 	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
@@ -327,9 +324,9 @@ static void gx_set_cpuspeed(unsigned int khz)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
-	dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+	pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
 		gx_params->on_duration * 32, gx_params->off_duration * 32);
-	dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
+	pr_debug("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
 }
 
 /****************************************************************
@@ -428,8 +425,8 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
 	stock_freq = maxfreq;
 	curfreq = gx_get_cpuspeed(0);
 
-	dprintk("cpu max frequency is %d.\n", maxfreq);
-	dprintk("cpu current frequency is %dkHz.\n", curfreq);
+	pr_debug("cpu max frequency is %d.\n", maxfreq);
+	pr_debug("cpu current frequency is %dkHz.\n", curfreq);
 
 	/* setup basic struct for cpufreq API */
 	policy->cpu = 0;
@@ -475,7 +472,7 @@ static int __init cpufreq_gx_init(void)
 	if (max_duration > 0xff)
 		max_duration = 0xff;
 
-	dprintk("geode suspend modulation available.\n");
+	pr_debug("geode suspend modulation available.\n");
 
 	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
 	if (params == NULL)
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index cf48cdd6907d..f47d26e2a135 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -77,9 +77,6 @@ static int scale_voltage;
 static int disable_acpi_c3;
 static int revid_errata;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longhaul", msg)
-
 
 /* Clock ratios multiplied by 10 */
 static int mults[32];
@@ -87,7 +84,6 @@ static int eblcr[32];
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 static char speedbuffer[8];
 
 static char *print_speed(int speed)
@@ -106,7 +102,6 @@ static char *print_speed(int speed)
 
 	return speedbuffer;
 }
-#endif
 
 
 static unsigned int calc_speed(int mult)
@@ -275,7 +270,7 @@ static void longhaul_setstate(unsigned int table_index)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+	pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
 			fsb, mult/10, mult%10, print_speed(speed/1000));
 retry_loop:
 	preempt_disable();
@@ -460,12 +455,12 @@ static int __cpuinit longhaul_get_ranges(void)
 		break;
 	}
 
-	dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
+	pr_debug("MinMult:%d.%dx MaxMult:%d.%dx\n",
 		 minmult/10, minmult%10, maxmult/10, maxmult%10);
 
 	highest_speed = calc_speed(maxmult);
 	lowest_speed = calc_speed(minmult);
-	dprintk("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+	pr_debug("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
 		 print_speed(lowest_speed/1000),
 		 print_speed(highest_speed/1000));
 
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/drivers/cpufreq/longhaul.h
index cbf48fbca881..cbf48fbca881 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ b/drivers/cpufreq/longhaul.h
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/drivers/cpufreq/longrun.c
index d9f51367666b..34ea359b370e 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/drivers/cpufreq/longrun.c
@@ -15,9 +15,6 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longrun", msg)
-
 static struct cpufreq_driver	longrun_driver;
 
 /**
@@ -40,14 +37,14 @@ static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
 	u32 msr_lo, msr_hi;
 
 	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun flags are %x - %x\n", msr_lo, msr_hi);
 	if (msr_lo & 0x01)
 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 	else
 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
 
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
 	msr_lo &= 0x0000007F;
 	msr_hi &= 0x0000007F;
 
@@ -150,7 +147,7 @@ static unsigned int longrun_get(unsigned int cpu)
 		return 0;
 
 	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	dprintk("cpuid eax is %u\n", eax);
+	pr_debug("cpuid eax is %u\n", eax);
 
 	return eax * 1000;
 }
@@ -196,7 +193,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
 		*high_freq = msr_lo * 1000; /* to kHz */
 
-		dprintk("longrun table interface told %u - %u kHz\n",
+		pr_debug("longrun table interface told %u - %u kHz\n",
 				*low_freq, *high_freq);
 
 		if (*low_freq > *high_freq)
@@ -207,7 +204,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	/* set the upper border to the value determined during TSC init */
 	*high_freq = (cpu_khz / 1000);
 	*high_freq = *high_freq * 1000;
-	dprintk("high frequency is %u kHz\n", *high_freq);
+	pr_debug("high frequency is %u kHz\n", *high_freq);
 
 	/* get current borders */
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
@@ -233,7 +230,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		/* restore values */
 		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
 	}
-	dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+	pr_debug("percentage is %u %%, freq is %u MHz\n", ecx, eax);
 
 	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
 	 * eqals
@@ -249,7 +246,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	edx = ((eax - ebx) * 100) / (100 - ecx);
 	*low_freq = edx * 1000; /* back to kHz */
 
-	dprintk("low frequency is %u kHz\n", *low_freq);
+	pr_debug("low frequency is %u kHz\n", *low_freq);
 
 	if (*low_freq > *high_freq)
 		*low_freq = *high_freq;
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/drivers/cpufreq/mperf.c
index 911e193018ae..911e193018ae 100644
--- a/arch/x86/kernel/cpu/cpufreq/mperf.c
+++ b/drivers/cpufreq/mperf.c
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/drivers/cpufreq/mperf.h
index 5dbf2950dc22..5dbf2950dc22 100644
--- a/arch/x86/kernel/cpu/cpufreq/mperf.h
+++ b/drivers/cpufreq/mperf.h
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index 52c93648e492..6be3e0760c26 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -35,8 +35,6 @@
 #include "speedstep-lib.h"
 
 #define PFX	"p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"p4-clockmod", msg)
 
 /*
  * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -66,7 +64,7 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
 
 	if (l & 0x01)
-		dprintk("CPU#%d currently thermal throttled\n", cpu);
+		pr_debug("CPU#%d currently thermal throttled\n", cpu);
 
 	if (has_N44_O17_errata[cpu] &&
 	    (newstate == DC_25PT || newstate == DC_DFLT))
@@ -74,10 +72,10 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 	if (newstate == DC_DISABLE) {
-		dprintk("CPU#%d disabling modulation\n", cpu);
+		pr_debug("CPU#%d disabling modulation\n", cpu);
 		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
 	} else {
-		dprintk("CPU#%d setting duty cycle to %d%%\n",
+		pr_debug("CPU#%d setting duty cycle to %d%%\n",
 			cpu, ((125 * newstate) / 10));
 		/* bits 63 - 5	: reserved
 		 * bit  4	: enable/disable
@@ -217,7 +215,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	case 0x0f11:
 	case 0x0f12:
 		has_N44_O17_errata[policy->cpu] = 1;
-		dprintk("has errata -- disabling low frequencies\n");
+		pr_debug("has errata -- disabling low frequencies\n");
 	}
 
 	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 755a31e0f5b0..7b0603eb0129 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -39,7 +39,7 @@
 
 #include <acpi/processor.h>
 
-#define PCC_VERSION 	"1.00.00"
+#define PCC_VERSION	"1.10.00"
 #define POLL_LOOPS 	300
 
 #define CMD_COMPLETE 	0x1
@@ -48,9 +48,6 @@
 
 #define BUF_SZ		4
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,	\
-					     "pcc-cpufreq", msg)
-
 struct pcc_register_resource {
 	u8 descriptor;
 	u16 length;
@@ -102,7 +99,7 @@ static struct acpi_generic_address doorbell;
 static u64 doorbell_preserve;
 static u64 doorbell_write;
 
-static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
+static u8 OSC_UUID[16] = {0x9F, 0x2C, 0x9B, 0x63, 0x91, 0x70, 0x1f, 0x49,
 			  0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
 
 struct pcc_cpu {
@@ -152,7 +149,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	spin_lock(&pcc_lock);
 
-	dprintk("get: get_freq for CPU %d\n", cpu);
+	pr_debug("get: get_freq for CPU %d\n", cpu);
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
 	input_buffer = 0x1;
@@ -170,7 +167,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("get: FAILED: for CPU %d, status is %d\n",
+		pr_debug("get: FAILED: for CPU %d, status is %d\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
@@ -178,14 +175,14 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
 			/ 100) * 1000);
 
-	dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
-		"0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
+	pr_debug("get: SUCCESS: (virtual) output_offset for cpu %d is "
+		"0x%p, contains a value of: 0x%x. Speed is: %d MHz\n",
 		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
 		output_buffer, curr_freq);
 
 	freq_limit = (output_buffer >> 8) & 0xff;
 	if (freq_limit != 0xff) {
-		dprintk("get: frequency for cpu %d is being temporarily"
+		pr_debug("get: frequency for cpu %d is being temporarily"
 			" capped at %d\n", cpu, curr_freq);
 	}
 
@@ -212,8 +209,8 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 	cpu = policy->cpu;
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
-	dprintk("target: CPU %d should go to target freq: %d "
-		"(virtual) input_offset is 0x%x\n",
+	pr_debug("target: CPU %d should go to target freq: %d "
+		"(virtual) input_offset is 0x%p\n",
 		cpu, target_freq,
 		(pcch_virt_addr + pcc_cpu_data->input_offset));
 
@@ -234,14 +231,14 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
+		pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
 	iowrite16(0, &pcch_hdr->status);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
+	pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu);
 	spin_unlock(&pcc_lock);
 
 	return 0;
@@ -293,7 +290,7 @@ static int pcc_get_offset(int cpu)
 	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
 	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
 
-	dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
+	pr_debug("pcc_get_offset: for CPU %d: pcc_cpu_data "
 		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
 		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
 out_free:
@@ -410,7 +407,7 @@ static int __init pcc_cpufreq_probe(void)
 	if (ACPI_SUCCESS(status)) {
 		ret = pcc_cpufreq_do_osc(&osc_handle);
 		if (ret)
-			dprintk("probe: _OSC evaluation did not succeed\n");
+			pr_debug("probe: _OSC evaluation did not succeed\n");
 		/* Firmware's use of _OSC is optional */
 		ret = 0;
 	}
@@ -433,7 +430,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
 
-	dprintk("probe: mem_resource descriptor: 0x%x,"
+	pr_debug("probe: mem_resource descriptor: 0x%x,"
 		" length: %d, space_id: %d, resource_usage: %d,"
 		" type_specific: %d, granularity: 0x%llx,"
 		" minimum: 0x%llx, maximum: 0x%llx,"
@@ -453,13 +450,13 @@ static int __init pcc_cpufreq_probe(void)
 	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
 					mem_resource->address_length);
 	if (pcch_virt_addr == NULL) {
-		dprintk("probe: could not map shared mem region\n");
+		pr_debug("probe: could not map shared mem region\n");
 		goto out_free;
 	}
 	pcch_hdr = pcch_virt_addr;
 
-	dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
-	dprintk("probe: PCCH header is at physical address: 0x%llx,"
+	pr_debug("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
+	pr_debug("probe: PCCH header is at physical address: 0x%llx,"
 		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
 		" supported features: 0x%x, command field: 0x%x,"
 		" status field: 0x%x, nominal latency: %d us\n",
@@ -469,7 +466,7 @@ static int __init pcc_cpufreq_probe(void)
 		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
 		ioread32(&pcch_hdr->latency));
 
-	dprintk("probe: min time between commands: %d us,"
+	pr_debug("probe: min time between commands: %d us,"
 		" max time between commands: %d us,"
 		" nominal CPU frequency: %d MHz,"
 		" minimum CPU frequency: %d MHz,"
@@ -494,7 +491,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.access_width = 64;
 	doorbell.address = reg_resource->address;
 
-	dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
+	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
 		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
 		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
 		doorbell.access_width, reg_resource->address);
@@ -515,7 +512,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	doorbell_write = member->integer.value;
 
-	dprintk("probe: doorbell_preserve: 0x%llx,"
+	pr_debug("probe: doorbell_preserve: 0x%llx,"
 		" doorbell_write: 0x%llx\n",
 		doorbell_preserve, doorbell_write);
 
@@ -550,7 +547,7 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	result = pcc_get_offset(cpu);
 	if (result) {
-		dprintk("init: PCCP evaluation failed\n");
+		pr_debug("init: PCCP evaluation failed\n");
 		goto out;
 	}
 
@@ -561,12 +558,12 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cur = pcc_get_freq(cpu);
 
 	if (!policy->cur) {
-		dprintk("init: Unable to get current CPU frequency\n");
+		pr_debug("init: Unable to get current CPU frequency\n");
 		result = -EINVAL;
 		goto out;
 	}
 
-	dprintk("init: policy->max is %d, policy->min is %d\n",
+	pr_debug("init: policy->max is %d, policy->min is %d\n",
 		policy->max, policy->min);
 out:
 	return result;
@@ -597,7 +594,7 @@ static int __init pcc_cpufreq_init(void)
 
 	ret = pcc_cpufreq_probe();
 	if (ret) {
-		dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
+		pr_debug("pcc_cpufreq_init: PCCH evaluation failed\n");
 		return ret;
 	}
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index b3379d6a5c57..b3379d6a5c57 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index 4a45fd6e41ba..d71d9f372359 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -68,7 +68,6 @@ union powernow_acpi_control_t {
 };
 #endif
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 /* divide by 1000 to get VCore voltage in V. */
 static const int mobile_vid_table[32] = {
     2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
@@ -76,7 +75,6 @@ static const int mobile_vid_table[32] = {
     1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
     1075, 1050, 1025, 1000, 975, 950, 925, 0,
 };
-#endif
 
 /* divide by 10 to get FID. */
 static const int fid_codes[32] = {
@@ -103,9 +101,6 @@ static unsigned int fsb;
 static unsigned int latency;
 static char have_a0;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"powernow-k7", msg)
-
 static int check_fsb(unsigned int fsbspeed)
 {
 	int delta;
@@ -209,7 +204,7 @@ static int get_ranges(unsigned char *pst)
 		vid = *pst++;
 		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed/1000, vid,
 			 mobile_vid_table[vid]/1000,
@@ -367,7 +362,7 @@ static int powernow_acpi_init(void)
 		unsigned int speed, speed_mhz;
 
 		pc.val = (unsigned long) state->control;
-		dprintk("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+		pr_debug("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
 			 i,
 			 (u32) state->core_frequency,
 			 (u32) state->power,
@@ -401,7 +396,7 @@ static int powernow_acpi_init(void)
 				invalidate_entry(i);
 		}
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed_mhz, vid,
 			 mobile_vid_table[vid]/1000,
@@ -409,7 +404,7 @@ static int powernow_acpi_init(void)
 
 		if (state->core_frequency != speed_mhz) {
 			state->core_frequency = speed_mhz;
-			dprintk("   Corrected ACPI frequency to %d\n",
+			pr_debug("   Corrected ACPI frequency to %d\n",
 				speed_mhz);
 		}
 
@@ -453,8 +448,8 @@ static int powernow_acpi_init(void)
 
 static void print_pst_entry(struct pst_s *pst, unsigned int j)
 {
-	dprintk("PST:%d (@%p)\n", j, pst);
-	dprintk(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
+	pr_debug("PST:%d (@%p)\n", j, pst);
+	pr_debug(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
 		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
 }
 
@@ -474,20 +469,20 @@ static int powernow_decode_bios(int maxfid, int startvid)
 		p = phys_to_virt(i);
 
 		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
-			dprintk("Found PSB header at %p\n", p);
+			pr_debug("Found PSB header at %p\n", p);
 			psb = (struct psb_s *) p;
-			dprintk("Table version: 0x%x\n", psb->tableversion);
+			pr_debug("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
 				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
 						" supported right now\n");
 				return -ENODEV;
 			}
 
-			dprintk("Flags: 0x%x\n", psb->flags);
+			pr_debug("Flags: 0x%x\n", psb->flags);
 			if ((psb->flags & 1) == 0)
-				dprintk("Mobile voltage regulator\n");
+				pr_debug("Mobile voltage regulator\n");
 			else
-				dprintk("Desktop voltage regulator\n");
+				pr_debug("Desktop voltage regulator\n");
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
@@ -497,9 +492,9 @@ static int powernow_decode_bios(int maxfid, int startvid)
 						"Correcting.\n", latency);
 				latency = 100;
 			}
-			dprintk("Settling Time: %d microseconds.\n",
+			pr_debug("Settling Time: %d microseconds.\n",
 					psb->settlingtime);
-			dprintk("Has %d PST tables. (Only dumping ones "
+			pr_debug("Has %d PST tables. (Only dumping ones "
 					"relevant to this CPU).\n",
 					psb->numpst);
 
@@ -650,7 +645,7 @@ static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
 		printk(KERN_WARNING PFX "can not determine bus frequency\n");
 		return -EINVAL;
 	}
-	dprintk("FSB: %3dMHz\n", fsb/1000);
+	pr_debug("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
 		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/drivers/cpufreq/powernow-k7.h
index 35fb4eaf6e1c..35fb4eaf6e1c 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ b/drivers/cpufreq/powernow-k7.h
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 2368e38327b3..83479b6fb9a1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -139,7 +139,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 	}
 	do {
 		if (i++ > 10000) {
-			dprintk("detected change pending stuck\n");
+			pr_debug("detected change pending stuck\n");
 			return 1;
 		}
 		rdmsr(MSR_FIDVID_STATUS, lo, hi);
@@ -176,7 +176,7 @@ static void fidvid_msr_init(void)
 	fid = lo & MSR_S_LO_CURRENT_FID;
 	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
 	hi = MSR_C_HI_STP_GNT_BENIGN;
-	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+	pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
 	wrmsr(MSR_FIDVID_CTL, lo, hi);
 }
 
@@ -196,7 +196,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
 
 	do {
@@ -244,7 +244,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 	lo |= (vid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
 
 	do {
@@ -325,7 +325,7 @@ static int transition_fid_vid(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+	pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), data->currfid, data->currvid);
 
 	return 0;
@@ -339,7 +339,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 maxvid, lo, rvomult = 1;
 
-	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
 		"reqvid 0x%x, rvo 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqvid, data->rvo);
@@ -349,12 +349,12 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	rvosteps *= rvomult;
 	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
 	maxvid = 0x1f & (maxvid >> 16);
-	dprintk("ph1 maxvid=0x%x\n", maxvid);
+	pr_debug("ph1 maxvid=0x%x\n", maxvid);
 	if (reqvid < maxvid) /* lower numbers are higher voltages */
 		reqvid = maxvid;
 
 	while (data->currvid > reqvid) {
-		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
+		pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
 			data->currvid, reqvid);
 		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
 			return 1;
@@ -365,7 +365,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		if (data->currvid == maxvid) {
 			rvosteps = 0;
 		} else {
-			dprintk("ph1: changing vid for rvo, req 0x%x\n",
+			pr_debug("ph1: changing vid for rvo, req 0x%x\n",
 				data->currvid - 1);
 			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
 				return 1;
@@ -382,7 +382,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -400,7 +400,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 0;
 	}
 
-	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
 		"reqfid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqfid);
@@ -457,7 +457,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 1;
 	}
 
-	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -470,7 +470,7 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 savereqvid = reqvid;
 
-	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid);
 
@@ -498,17 +498,17 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 		return 1;
 
 	if (savereqvid != data->currvid) {
-		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
+		pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
 		return 1;
 	}
 
 	if (savefid != data->currfid) {
-		dprintk("ph3 failed, currfid changed 0x%x\n",
+		pr_debug("ph3 failed, currfid changed 0x%x\n",
 			data->currfid);
 		return 1;
 	}
 
-	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -707,7 +707,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		return -EIO;
 	}
 
-	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+	pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
 	data->powernow_table = powernow_table;
 	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
 		print_basics(data);
@@ -717,7 +717,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		    (pst[j].vid == data->currvid))
 			return 0;
 
-	dprintk("currfid/vid do not match PST, ignoring\n");
+	pr_debug("currfid/vid do not match PST, ignoring\n");
 	return 0;
 }
 
@@ -739,36 +739,36 @@ static int find_psb_table(struct powernow_k8_data *data)
 		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
 			continue;
 
-		dprintk("found PSB header at 0x%p\n", psb);
+		pr_debug("found PSB header at 0x%p\n", psb);
 
-		dprintk("table vers: 0x%x\n", psb->tableversion);
+		pr_debug("table vers: 0x%x\n", psb->tableversion);
 		if (psb->tableversion != PSB_VERSION_1_4) {
 			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
 			return -ENODEV;
 		}
 
-		dprintk("flags: 0x%x\n", psb->flags1);
+		pr_debug("flags: 0x%x\n", psb->flags1);
 		if (psb->flags1) {
 			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
 			return -ENODEV;
 		}
 
 		data->vstable = psb->vstable;
-		dprintk("voltage stabilization time: %d(*20us)\n",
+		pr_debug("voltage stabilization time: %d(*20us)\n",
 				data->vstable);
 
-		dprintk("flags2: 0x%x\n", psb->flags2);
+		pr_debug("flags2: 0x%x\n", psb->flags2);
 		data->rvo = psb->flags2 & 3;
 		data->irt = ((psb->flags2) >> 2) & 3;
 		mvs = ((psb->flags2) >> 4) & 3;
 		data->vidmvs = 1 << mvs;
 		data->batps = ((psb->flags2) >> 6) & 3;
 
-		dprintk("ramp voltage offset: %d\n", data->rvo);
-		dprintk("isochronous relief time: %d\n", data->irt);
-		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+		pr_debug("ramp voltage offset: %d\n", data->rvo);
+		pr_debug("isochronous relief time: %d\n", data->irt);
+		pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
 
-		dprintk("numpst: 0x%x\n", psb->num_tables);
+		pr_debug("numpst: 0x%x\n", psb->num_tables);
 		cpst = psb->num_tables;
 		if ((psb->cpuid == 0x00000fc0) ||
 		    (psb->cpuid == 0x00000fe0)) {
@@ -783,13 +783,13 @@ static int find_psb_table(struct powernow_k8_data *data)
 		}
 
 		data->plllock = psb->plllocktime;
-		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
-		dprintk("maxfid: 0x%x\n", psb->maxfid);
-		dprintk("maxvid: 0x%x\n", psb->maxvid);
+		pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+		pr_debug("maxfid: 0x%x\n", psb->maxfid);
+		pr_debug("maxvid: 0x%x\n", psb->maxvid);
 		maxvid = psb->maxvid;
 
 		data->numps = psb->numps;
-		dprintk("numpstates: 0x%x\n", data->numps);
+		pr_debug("numpstates: 0x%x\n", data->numps);
 		return fill_powernow_table(data,
 				(struct pst_s *)(psb+1), maxvid);
 	}
@@ -834,13 +834,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	u64 control, status;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
-		dprintk("register performance failed: bad ACPI data\n");
+		pr_debug("register performance failed: bad ACPI data\n");
 		return -EIO;
 	}
 
 	/* verify the data contained in the ACPI structures */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No ACPI P-States\n");
+		pr_debug("No ACPI P-States\n");
 		goto err_out;
 	}
 
@@ -849,7 +849,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 
 	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Invalid control/status registers (%x - %x)\n",
+		pr_debug("Invalid control/status registers (%llx - %llx)\n",
 			control, status);
 		goto err_out;
 	}
@@ -858,7 +858,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
 		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
 	if (!powernow_table) {
-		dprintk("powernow_table memory alloc failure\n");
+		pr_debug("powernow_table memory alloc failure\n");
 		goto err_out;
 	}
 
@@ -928,7 +928,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
 		}
 		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
 		if (!(hi & HW_PSTATE_VALID_MASK)) {
-			dprintk("invalid pstate %d, ignoring\n", index);
+			pr_debug("invalid pstate %d, ignoring\n", index);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -968,7 +968,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 			vid = (control >> VID_SHIFT) & VID_MASK;
 		}
 
-		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+		pr_debug("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
 
 		index = fid | (vid<<8);
 		powernow_table[i].index = index;
@@ -978,7 +978,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 
 		/* verify frequency is OK */
 		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
-			dprintk("invalid freq %u kHz, ignoring\n", freq);
+			pr_debug("invalid freq %u kHz, ignoring\n", freq);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -986,7 +986,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		/* verify voltage is OK -
 		 * BIOSs are using "off" to indicate invalid */
 		if (vid == VID_OFF) {
-			dprintk("invalid vid %u, ignoring\n", vid);
+			pr_debug("invalid vid %u, ignoring\n", vid);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -1047,7 +1047,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* fid/vid correctness check for k8 */
 	/* fid are the lower 8 bits of the index we stored into
@@ -1057,18 +1057,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	fid = data->powernow_table[index].index & 0xFF;
 	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
 
-	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+	pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
 
 	if (query_current_values_with_pending_wait(data))
 		return 1;
 
 	if ((data->currvid == vid) && (data->currfid == fid)) {
-		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
+		pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
 			fid, vid);
 		return 0;
 	}
 
-	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+	pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), fid, vid);
 	freqs.old = find_khz_freq_from_fid(data->currfid);
 	freqs.new = find_khz_freq_from_fid(fid);
@@ -1096,7 +1096,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* get MSR index for hardware pstate transition */
 	pstate = index & HW_PSTATE_MASK;
@@ -1156,14 +1156,14 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 		goto err_out;
 	}
 
-	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
 		pol->cpu, targfreq, pol->min, pol->max, relation);
 
 	if (query_current_values_with_pending_wait(data))
 		goto err_out;
 
 	if (cpu_family != CPU_HW_PSTATE) {
-		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
 		data->currfid, data->currvid);
 
 		if ((checkvid != data->currvid) ||
@@ -1319,7 +1319,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 				data->currpstate);
 	else
 		pol->cur = find_khz_freq_from_fid(data->currfid);
-	dprintk("policy current frequency %d kHz\n", pol->cur);
+	pr_debug("policy current frequency %d kHz\n", pol->cur);
 
 	/* min/max the cpu is capable of */
 	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
@@ -1337,10 +1337,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		dprintk("cpu_init done, current pstate 0x%x\n",
+		pr_debug("cpu_init done, current pstate 0x%x\n",
 				data->currpstate);
 	else
-		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+		pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
 			data->currfid, data->currvid);
 
 	per_cpu(powernow_data, pol->cpu) = data;
@@ -1586,7 +1586,7 @@ static int __cpuinit powernowk8_init(void)
 /* driver entry point for term */
 static void __exit powernowk8_exit(void)
 {
-	dprintk("exit\n");
+	pr_debug("exit\n");
 
 	if (boot_cpu_has(X86_FEATURE_CPB)) {
 		msrs_free(msrs);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/drivers/cpufreq/powernow-k8.h
index df3529b1c02d..3744d26cdc2b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/drivers/cpufreq/powernow-k8.h
@@ -211,8 +211,6 @@ struct pst_s {
 	u8 vid;
 };
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
-
 static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 reqvid, u32 regfid);
 static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index 435a996a613a..1e205e6b1727 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -29,8 +29,6 @@
 
 static __u8 __iomem *cpuctl;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"sc520_freq", msg)
 #define PFX "sc520_freq: "
 
 static struct cpufreq_frequency_table sc520_freq_table[] = {
@@ -66,7 +64,7 @@ static void sc520_freq_set_cpu_state(unsigned int state)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("attempting to set frequency to %i kHz\n",
+	pr_debug("attempting to set frequency to %i kHz\n",
 			sc520_freq_table[state].frequency);
 
 	local_irq_disable();
@@ -161,7 +159,7 @@ static int __init sc520_freq_init(void)
 	/* Test if we have the right hardware */
 	if (c->x86_vendor != X86_VENDOR_AMD ||
 	    c->x86 != 4 || c->x86_model != 9) {
-		dprintk("no Elan SC520 processor found!\n");
+		pr_debug("no Elan SC520 processor found!\n");
 		return -ENODEV;
 	}
 	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 9b1ff37de46a..6ea3455def21 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -29,9 +29,6 @@
 #define PFX		"speedstep-centrino: "
 #define MAINTAINER	"cpufreq@vger.kernel.org"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
-
 #define INTEL_MSR_RANGE	(0xffff)
 
 struct cpu_id
@@ -244,7 +241,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->cpu_id == NULL) {
 		/* No match at all */
-		dprintk("no support for CPU model \"%s\": "
+		pr_debug("no support for CPU model \"%s\": "
 		       "send /proc/cpuinfo to " MAINTAINER "\n",
 		       cpu->x86_model_id);
 		return -ENOENT;
@@ -252,15 +249,15 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->op_points == NULL) {
 		/* Matched a non-match */
-		dprintk("no table support for CPU model \"%s\"\n",
+		pr_debug("no table support for CPU model \"%s\"\n",
 		       cpu->x86_model_id);
-		dprintk("try using the acpi-cpufreq driver\n");
+		pr_debug("try using the acpi-cpufreq driver\n");
 		return -ENOENT;
 	}
 
 	per_cpu(centrino_model, policy->cpu) = model;
 
-	dprintk("found \"%s\": max frequency: %dkHz\n",
+	pr_debug("found \"%s\": max frequency: %dkHz\n",
 	       model->model_name, model->max_freq);
 
 	return 0;
@@ -369,7 +366,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 		per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
 
 	if (!per_cpu(centrino_cpu, policy->cpu)) {
-		dprintk("found unsupported CPU with "
+		pr_debug("found unsupported CPU with "
 		"Enhanced SpeedStep: send /proc/cpuinfo to "
 		MAINTAINER "\n");
 		return -ENODEV;
@@ -385,7 +382,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 
 	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
 		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
+		pr_debug("trying to enable Enhanced SpeedStep (%x)\n", l);
 		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
 
 		/* check to see if it stuck */
@@ -402,7 +399,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 						/* 10uS transition latency */
 	policy->cur = freq;
 
-	dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+	pr_debug("centrino_cpu_init: cur=%dkHz\n", policy->cur);
 
 	ret = cpufreq_frequency_table_cpuinfo(policy,
 		per_cpu(centrino_model, policy->cpu)->op_points);
@@ -498,7 +495,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			good_cpu = j;
 
 		if (good_cpu >= nr_cpu_ids) {
-			dprintk("couldn't limit to CPUs in this domain\n");
+			pr_debug("couldn't limit to CPUs in this domain\n");
 			retval = -EAGAIN;
 			if (first_cpu) {
 				/* We haven't started the transition yet. */
@@ -512,7 +509,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 		if (first_cpu) {
 			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
 			if (msr == (oldmsr & 0xffff)) {
-				dprintk("no change needed - msr was and needs "
+				pr_debug("no change needed - msr was and needs "
 					"to be %x\n", oldmsr);
 				retval = 0;
 				goto out;
@@ -521,7 +518,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			freqs.old = extract_clock(oldmsr, cpu, 0);
 			freqs.new = extract_clock(msr, cpu, 0);
 
-			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
+			pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
 				target_freq, freqs.old, freqs.new, msr);
 
 			for_each_cpu(k, policy->cpus) {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 561758e95180..a748ce782fee 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -53,10 +53,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
 };
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-ich", msg)
-
-
 /**
  * speedstep_find_register - read the PMBASE address
  *
@@ -80,7 +76,7 @@ static int speedstep_find_register(void)
 		return -ENODEV;
 	}
 
-	dprintk("pmbase is 0x%x\n", pmbase);
+	pr_debug("pmbase is 0x%x\n", pmbase);
 	return 0;
 }
 
@@ -106,13 +102,13 @@ static void speedstep_set_state(unsigned int state)
 	/* read state */
 	value = inb(pmbase + 0x50);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	/* write new state */
 	value &= 0xFE;
 	value |= state;
 
-	dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+	pr_debug("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
 
 	/* Disable bus master arbitration */
 	pm2_blk = inb(pmbase + 0x20);
@@ -132,10 +128,10 @@ static void speedstep_set_state(unsigned int state)
 	/* Enable IRQs */
 	local_irq_restore(flags);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	if (state == (value & 0x1))
-		dprintk("change to %u MHz succeeded\n",
+		pr_debug("change to %u MHz succeeded\n",
 			speedstep_get_frequency(speedstep_processor) / 1000);
 	else
 		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
@@ -165,7 +161,7 @@ static int speedstep_activate(void)
 	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
 	if (!(value & 0x08)) {
 		value |= 0x08;
-		dprintk("activating SpeedStep (TM) registers\n");
+		pr_debug("activating SpeedStep (TM) registers\n");
 		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
 	}
 
@@ -218,7 +214,7 @@ static unsigned int speedstep_detect_chipset(void)
 			return 2; /* 2-M */
 
 		if (hostbridge->revision < 5) {
-			dprintk("hostbridge does not support speedstep\n");
+			pr_debug("hostbridge does not support speedstep\n");
 			speedstep_chipset_dev = NULL;
 			pci_dev_put(hostbridge);
 			return 0;
@@ -246,7 +242,7 @@ static unsigned int speedstep_get(unsigned int cpu)
 	if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
 		BUG();
 
-	dprintk("detected %u kHz as current frequency\n", speed);
+	pr_debug("detected %u kHz as current frequency\n", speed);
 	return speed;
 }
 
@@ -276,7 +272,7 @@ static int speedstep_target(struct cpufreq_policy *policy,
 	freqs.new = speedstep_freqs[newstate].frequency;
 	freqs.cpu = policy->cpu;
 
-	dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+	pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
 
 	/* no transition necessary */
 	if (freqs.old == freqs.new)
@@ -351,7 +347,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (!speed)
 		return -EIO;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -405,14 +401,14 @@ static int __init speedstep_init(void)
 	/* detect processor */
 	speedstep_processor = speedstep_detect_processor();
 	if (!speedstep_processor) {
-		dprintk("Intel(R) SpeedStep(TM) capable processor "
+		pr_debug("Intel(R) SpeedStep(TM) capable processor "
 				"not found\n");
 		return -ENODEV;
 	}
 
 	/* detect chipset */
 	if (!speedstep_detect_chipset()) {
-		dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+		pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
 				"(yet) available.\n");
 		return -ENODEV;
 	}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index a94ec6be69fa..8af2d2fd9d51 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -18,9 +18,6 @@
 #include <asm/tsc.h>
 #include "speedstep-lib.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-lib", msg)
-
 #define PFX "speedstep-lib: "
 
 #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
@@ -75,7 +72,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* read MSR 0x2a - we only need the low 32 bits */
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 	msr_tmp = msr_lo;
 
 	/* decode the FSB */
@@ -89,7 +86,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* decode the multiplier */
 	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
-		dprintk("workaround for early PIIIs\n");
+		pr_debug("workaround for early PIIIs\n");
 		msr_lo &= 0x03c00000;
 	} else
 		msr_lo &= 0x0bc00000;
@@ -100,7 +97,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 		j++;
 	}
 
-	dprintk("speed is %u\n",
+	pr_debug("speed is %u\n",
 		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
 
 	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
@@ -112,7 +109,7 @@ static unsigned int pentiumM_get_frequency(void)
 	u32 msr_lo, msr_tmp;
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 
 	/* see table B-2 of 24547212.pdf */
 	if (msr_lo & 0x00040000) {
@@ -122,7 +119,7 @@ static unsigned int pentiumM_get_frequency(void)
 	}
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * 100 * 1000));
 
 	return msr_tmp * 100 * 1000;
@@ -160,11 +157,11 @@ static unsigned int pentium_core_get_frequency(void)
 	}
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+	pr_debug("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
 			msr_lo, msr_tmp);
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * fsb));
 
 	ret = (msr_tmp * fsb);
@@ -190,7 +187,7 @@ static unsigned int pentium4_get_frequency(void)
 
 	rdmsr(0x2c, msr_lo, msr_hi);
 
-	dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+	pr_debug("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
 
 	/* decode the FSB: see IA-32 Intel (C) Architecture Software
 	 * Developer's Manual, Volume 3: System Prgramming Guide,
@@ -217,7 +214,7 @@ static unsigned int pentium4_get_frequency(void)
 	/* Multiplier. */
 	mult = msr_lo >> 24;
 
-	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+	pr_debug("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
 			fsb, mult, (fsb * mult));
 
 	ret = (fsb * mult);
@@ -257,7 +254,7 @@ unsigned int speedstep_detect_processor(void)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
 
-	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
+	pr_debug("x86: %x, model: %x\n", c->x86, c->x86_model);
 
 	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
 	    ((c->x86 != 6) && (c->x86 != 0xF)))
@@ -272,7 +269,7 @@ unsigned int speedstep_detect_processor(void)
 		ebx = cpuid_ebx(0x00000001);
 		ebx &= 0x000000FF;
 
-		dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+		pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
 
 		switch (c->x86_mask) {
 		case 4:
@@ -327,7 +324,7 @@ unsigned int speedstep_detect_processor(void)
 		/* cpuid_ebx(1) is 0x04 for desktop PIII,
 		 * 0x06 for mobile PIII-M */
 		ebx = cpuid_ebx(0x00000001);
-		dprintk("ebx is %x\n", ebx);
+		pr_debug("ebx is %x\n", ebx);
 
 		ebx &= 0x000000FF;
 
@@ -344,7 +341,7 @@ unsigned int speedstep_detect_processor(void)
 		/* all mobile PIII Coppermines have FSB 100 MHz
 		 * ==> sort out a few desktop PIIIs. */
 		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		msr_lo &= 0x00c0000;
 		if (msr_lo != 0x0080000)
@@ -357,12 +354,12 @@ unsigned int speedstep_detect_processor(void)
 		 * bit 56 or 57 is set
 		 */
 		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		if ((msr_hi & (1<<18)) &&
 		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
 			if (c->x86_mask == 0x01) {
-				dprintk("early PIII version\n");
+				pr_debug("early PIII version\n");
 				return SPEEDSTEP_CPU_PIII_C_EARLY;
 			} else
 				return SPEEDSTEP_CPU_PIII_C;
@@ -393,14 +390,14 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
 		return -EINVAL;
 
-	dprintk("trying to determine both speeds\n");
+	pr_debug("trying to determine both speeds\n");
 
 	/* get current speed */
 	prev_speed = speedstep_get_frequency(processor);
 	if (!prev_speed)
 		return -EIO;
 
-	dprintk("previous speed is %u\n", prev_speed);
+	pr_debug("previous speed is %u\n", prev_speed);
 
 	local_irq_save(flags);
 
@@ -412,7 +409,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("low speed is %u\n", *low_speed);
+	pr_debug("low speed is %u\n", *low_speed);
 
 	/* start latency measurement */
 	if (transition_latency)
@@ -431,7 +428,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("high speed is %u\n", *high_speed);
+	pr_debug("high speed is %u\n", *high_speed);
 
 	if (*low_speed == *high_speed) {
 		ret = -ENODEV;
@@ -445,7 +442,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if (transition_latency) {
 		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
 			tv2.tv_usec - tv1.tv_usec;
-		dprintk("transition latency is %u uSec\n", *transition_latency);
+		pr_debug("transition latency is %u uSec\n", *transition_latency);
 
 		/* convert uSec to nSec and add 20% for safety reasons */
 		*transition_latency *= 1200;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/drivers/cpufreq/speedstep-lib.h
index 70d9cea1219d..70d9cea1219d 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/drivers/cpufreq/speedstep-lib.h
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index 91bc25b67bc1..c76ead3490bf 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -55,9 +55,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
  * of DMA activity going on? */
 #define SMI_TRIES 5
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-smi", msg)
-
 /**
  * speedstep_smi_ownership
  */
@@ -70,7 +67,7 @@ static int speedstep_smi_ownership(void)
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 	magic = virt_to_phys(magic_data);
 
-	dprintk("trying to obtain ownership with command %x at port %x\n",
+	pr_debug("trying to obtain ownership with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -85,7 +82,7 @@ static int speedstep_smi_ownership(void)
 		: "memory"
 	);
 
-	dprintk("result is %x\n", result);
+	pr_debug("result is %x\n", result);
 
 	return result;
 }
@@ -106,13 +103,13 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 	u32 function = GET_SPEEDSTEP_FREQS;
 
 	if (!(ist_info.event & 0xFFFF)) {
-		dprintk("bug #1422 -- can't read freqs from BIOS\n");
+		pr_debug("bug #1422 -- can't read freqs from BIOS\n");
 		return -ENODEV;
 	}
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine frequencies with command %x at port %x\n",
+	pr_debug("trying to determine frequencies with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -129,7 +126,7 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("result %x, low_freq %u, high_freq %u\n",
+	pr_debug("result %x, low_freq %u, high_freq %u\n",
 			result, low_mhz, high_mhz);
 
 	/* abort if results are obviously incorrect... */
@@ -154,7 +151,7 @@ static int speedstep_get_state(void)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine current setting with command %x "
+	pr_debug("trying to determine current setting with command %x "
 		"at port %x\n", command, smi_port);
 
 	__asm__ __volatile__(
@@ -168,7 +165,7 @@ static int speedstep_get_state(void)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("state is %x, result is %x\n", state, result);
+	pr_debug("state is %x, result is %x\n", state, result);
 
 	return state & 1;
 }
@@ -194,13 +191,13 @@ static void speedstep_set_state(unsigned int state)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to set frequency to state %u "
+	pr_debug("trying to set frequency to state %u "
 		"with command %x at port %x\n",
 		state, command, smi_port);
 
 	do {
 		if (retry) {
-			dprintk("retry %u, previous result %u, waiting...\n",
+			pr_debug("retry %u, previous result %u, waiting...\n",
 					retry, result);
 			mdelay(retry * 50);
 		}
@@ -221,7 +218,7 @@ static void speedstep_set_state(unsigned int state)
 	local_irq_restore(flags);
 
 	if (new_state == state)
-		dprintk("change to %u MHz succeeded after %u tries "
+		pr_debug("change to %u MHz succeeded after %u tries "
 			"with result %u\n",
 			(speedstep_freqs[new_state].frequency / 1000),
 			retry, result);
@@ -292,7 +289,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	result = speedstep_smi_ownership();
 	if (result) {
-		dprintk("fails in acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in acquiring ownership of a SMI interface.\n");
 		return -EINVAL;
 	}
 
@@ -304,7 +301,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (result) {
 		/* fall back to speedstep_lib.c dection mechanism:
 		 * try both states out */
-		dprintk("could not detect low and high frequencies "
+		pr_debug("could not detect low and high frequencies "
 				"by SMI call.\n");
 		result = speedstep_get_freqs(speedstep_processor,
 				low, high,
@@ -312,18 +309,18 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 				&speedstep_set_state);
 
 		if (result) {
-			dprintk("could not detect two different speeds"
+			pr_debug("could not detect two different speeds"
 					" -- aborting.\n");
 			return result;
 		} else
-			dprintk("workaround worked.\n");
+			pr_debug("workaround worked.\n");
 	}
 
 	/* get current speed setting */
 	state = speedstep_get_state();
 	speed = speedstep_freqs[state].frequency;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -360,7 +357,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
 	int result = speedstep_smi_ownership();
 
 	if (result)
-		dprintk("fails in re-acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in re-acquiring ownership of a SMI interface.\n");
 
 	return result;
 }
@@ -403,12 +400,12 @@ static int __init speedstep_init(void)
 	}
 
 	if (!speedstep_processor) {
-		dprintk("No supported Intel CPU detected.\n");
+		pr_debug("No supported Intel CPU detected.\n");
 		return -ENODEV;
 	}
 
-	dprintk("signature:0x%.8lx, command:0x%.8lx, "
-		"event:0x%.8lx, perf_level:0x%.8lx.\n",
+	pr_debug("signature:0x%.8ulx, command:0x%.8ulx, "
+		"event:0x%.8ulx, perf_level:0x%.8ulx.\n",
 		ist_info.signature, ist_info.command,
 		ist_info.event, ist_info.perf_level);
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 6b396759e7f5..8a781540590c 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -1448,7 +1448,7 @@ static const struct of_device_id fsldma_of_ids[] = {
 	{}
 };
 
-static struct of_platform_driver fsldma_of_driver = {
+static struct platform_driver fsldma_of_driver = {
 	.driver = {
 		.name = "fsl-elo-dma",
 		.owner = THIS_MODULE,
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 31e71c4fc831..9a8bebcf6b17 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -211,8 +211,6 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
 
 	scrubval = scrubval & 0x001F;
 
-	amd64_debug("pci-read, sdram scrub control value: %d\n", scrubval);
-
 	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
 		if (scrubrates[i].scrubval == scrubval) {
 			retval = scrubrates[i].bandwidth;
@@ -933,25 +931,74 @@ static int k8_early_channel_count(struct amd64_pvt *pvt)
 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
 static u64 get_error_address(struct mce *m)
 {
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u64 addr;
 	u8 start_bit = 1;
 	u8 end_bit   = 47;
 
-	if (boot_cpu_data.x86 == 0xf) {
+	if (c->x86 == 0xf) {
 		start_bit = 3;
 		end_bit   = 39;
 	}
 
-	return m->addr & GENMASK(start_bit, end_bit);
+	addr = m->addr & GENMASK(start_bit, end_bit);
+
+	/*
+	 * Erratum 637 workaround
+	 */
+	if (c->x86 == 0x15) {
+		struct amd64_pvt *pvt;
+		u64 cc6_base, tmp_addr;
+		u32 tmp;
+		u8 mce_nid, intlv_en;
+
+		if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
+			return addr;
+
+		mce_nid	= amd_get_nb_id(m->extcpu);
+		pvt	= mcis[mce_nid]->pvt_info;
+
+		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
+		intlv_en = tmp >> 21 & 0x7;
+
+		/* add [47:27] + 3 trailing bits */
+		cc6_base  = (tmp & GENMASK(0, 20)) << 3;
+
+		/* reverse and add DramIntlvEn */
+		cc6_base |= intlv_en ^ 0x7;
+
+		/* pin at [47:24] */
+		cc6_base <<= 24;
+
+		if (!intlv_en)
+			return cc6_base | (addr & GENMASK(0, 23));
+
+		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
+
+							/* faster log2 */
+		tmp_addr  = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1);
+
+		/* OR DramIntlvSel into bits [14:12] */
+		tmp_addr |= (tmp & GENMASK(21, 23)) >> 9;
+
+		/* add remaining [11:0] bits from original MC4_ADDR */
+		tmp_addr |= addr & GENMASK(0, 11);
+
+		return cc6_base | tmp_addr;
+	}
+
+	return addr;
 }
 
 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 {
+	struct cpuinfo_x86 *c = &boot_cpu_data;
 	int off = range << 3;
 
 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off,  &pvt->ranges[range].base.lo);
 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
 
-	if (boot_cpu_data.x86 == 0xf)
+	if (c->x86 == 0xf)
 		return;
 
 	if (!dram_rw(pvt, range))
@@ -959,6 +1006,31 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 
 	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off,  &pvt->ranges[range].base.hi);
 	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
+
+	/* Factor in CC6 save area by reading dst node's limit reg */
+	if (c->x86 == 0x15) {
+		struct pci_dev *f1 = NULL;
+		u8 nid = dram_dst_node(pvt, range);
+		u32 llim;
+
+		f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1));
+		if (WARN_ON(!f1))
+			return;
+
+		amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
+
+		pvt->ranges[range].lim.lo &= GENMASK(0, 15);
+
+					    /* {[39:27],111b} */
+		pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
+
+		pvt->ranges[range].lim.hi &= GENMASK(0, 7);
+
+					    /* [47:40] */
+		pvt->ranges[range].lim.hi |= llim >> 13;
+
+		pci_dev_put(f1);
+	}
 }
 
 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
@@ -1403,12 +1475,8 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
 		return -EINVAL;
 	}
 
-	if (intlv_en &&
-	    (intlv_sel != ((sys_addr >> 12) & intlv_en))) {
-		amd64_warn("Botched intlv bits, en: 0x%x, sel: 0x%x\n",
-			   intlv_en, intlv_sel);
+	if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
 		return -EINVAL;
-	}
 
 	sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
 
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 11be36a311eb..9a666cb985b2 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -196,6 +196,9 @@
 
 #define DCT_CFG_SEL			0x10C
 
+#define DRAM_LOCAL_NODE_BASE		0x120
+#define DRAM_LOCAL_NODE_LIM		0x124
+
 #define DRAM_BASE_HI			0x140
 #define DRAM_LIMIT_HI			0x144
 
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 26343fd46596..29ffa350bfbe 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -458,13 +458,13 @@ static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci,
 		return -EINVAL;
 
 	new_bw = mci->set_sdram_scrub_rate(mci, bandwidth);
-	if (new_bw >= 0) {
-		edac_printk(KERN_DEBUG, EDAC_MC, "Scrub rate set to %d\n", new_bw);
-		return count;
+	if (new_bw < 0) {
+		edac_printk(KERN_WARNING, EDAC_MC,
+			    "Error setting scrub rate to: %lu\n", bandwidth);
+		return -EINVAL;
 	}
 
-	edac_printk(KERN_DEBUG, EDAC_MC, "Error setting scrub rate to: %lu\n", bandwidth);
-	return -EINVAL;
+	return count;
 }
 
 /*
@@ -483,7 +483,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data)
 		return bandwidth;
 	}
 
-	edac_printk(KERN_DEBUG, EDAC_MC, "Read scrub rate: %d\n", bandwidth);
 	return sprintf(data, "%d\n", bandwidth);
 }
 
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index c1f0045ceb8e..af8e7b1aa290 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -1019,7 +1019,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
 	struct ppc4xx_edac_pdata *pdata = NULL;
 	const struct device_node *np = op->dev.of_node;
 
-	if (op->dev.of_match == NULL)
+	if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL)
 		return -EINVAL;
 
 	/* Initial driver pointers and private data */
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index f903d7b6f34a..23d1468ad253 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2199,7 +2199,6 @@ static int ohci_set_config_rom(struct fw_card *card,
 {
 	struct fw_ohci *ohci;
 	unsigned long flags;
-	int ret = -EBUSY;
 	__be32 *next_config_rom;
 	dma_addr_t uninitialized_var(next_config_rom_bus);
 
@@ -2240,22 +2239,37 @@ static int ohci_set_config_rom(struct fw_card *card,
 
 	spin_lock_irqsave(&ohci->lock, flags);
 
+	/*
+	 * If there is not an already pending config_rom update,
+	 * push our new allocation into the ohci->next_config_rom
+	 * and then mark the local variable as null so that we
+	 * won't deallocate the new buffer.
+	 *
+	 * OTOH, if there is a pending config_rom update, just
+	 * use that buffer with the new config_rom data, and
+	 * let this routine free the unused DMA allocation.
+	 */
+
 	if (ohci->next_config_rom == NULL) {
 		ohci->next_config_rom = next_config_rom;
 		ohci->next_config_rom_bus = next_config_rom_bus;
+		next_config_rom = NULL;
+	}
 
-		copy_config_rom(ohci->next_config_rom, config_rom, length);
+	copy_config_rom(ohci->next_config_rom, config_rom, length);
 
-		ohci->next_header = config_rom[0];
-		ohci->next_config_rom[0] = 0;
+	ohci->next_header = config_rom[0];
+	ohci->next_config_rom[0] = 0;
 
-		reg_write(ohci, OHCI1394_ConfigROMmap,
-			  ohci->next_config_rom_bus);
-		ret = 0;
-	}
+	reg_write(ohci, OHCI1394_ConfigROMmap, ohci->next_config_rom_bus);
 
 	spin_unlock_irqrestore(&ohci->lock, flags);
 
+	/* If we didn't use the DMA allocation, delete it. */
+	if (next_config_rom != NULL)
+		dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
+				  next_config_rom, next_config_rom_bus);
+
 	/*
 	 * Now initiate a bus reset to have the changes take
 	 * effect. We clean up the old config rom memory and DMA
@@ -2263,13 +2277,10 @@ static int ohci_set_config_rom(struct fw_card *card,
 	 * controller could need to access it before the bus reset
 	 * takes effect.
 	 */
-	if (ret == 0)
-		fw_schedule_bus_reset(&ohci->card, true, true);
-	else
-		dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
-				  next_config_rom, next_config_rom_bus);
 
-	return ret;
+	fw_schedule_bus_reset(&ohci->card, true, true);
+
+	return 0;
 }
 
 static void ohci_send_request(struct fw_card *card, struct fw_packet *packet)
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index b3a25a55ba23..efba163595db 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -157,4 +157,6 @@ config SIGMA
 	  If unsure, say N here.  Drivers that need these helpers will select
 	  this option automatically.
 
+source "drivers/firmware/google/Kconfig"
+
 endmenu
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 00bb0b80a79f..47338c979126 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -13,3 +13,5 @@ obj-$(CONFIG_ISCSI_IBFT_FIND)	+= iscsi_ibft_find.o
 obj-$(CONFIG_ISCSI_IBFT)	+= iscsi_ibft.o
 obj-$(CONFIG_FIRMWARE_MEMMAP)	+= memmap.o
 obj-$(CONFIG_SIGMA)		+= sigma.o
+
+obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c
index 96c25d93eed1..f1b7f659d3c9 100644
--- a/drivers/firmware/edd.c
+++ b/drivers/firmware/edd.c
@@ -531,8 +531,8 @@ static int
 edd_has_edd30(struct edd_device *edev)
 {
 	struct edd_info *info;
-	int i, nonzero_path = 0;
-	char c;
+	int i;
+	u8 csum = 0;
 
 	if (!edev)
 		return 0;
@@ -544,16 +544,16 @@ edd_has_edd30(struct edd_device *edev)
 		return 0;
 	}
 
-	for (i = 30; i <= 73; i++) {
-		c = *(((uint8_t *) info) + i + 4);
-		if (c) {
-			nonzero_path++;
-			break;
-		}
-	}
-	if (!nonzero_path) {
+
+	/* We support only T13 spec */
+	if (info->params.device_path_info_length != 44)
+		return 0;
+
+	for (i = 30; i < info->params.device_path_info_length + 30; i++)
+		csum += *(((u8 *)&info->params) + i);
+
+	if (csum)
 		return 0;
-	}
 
 	return 1;
 }
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index ff0c373e3bbf..a2d2f1f0d4f3 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -677,8 +677,8 @@ create_efivars_bin_attributes(struct efivars *efivars)
 
 	return 0;
 out_free:
-	kfree(efivars->new_var);
-	efivars->new_var = NULL;
+	kfree(efivars->del_var);
+	efivars->del_var = NULL;
 	kfree(efivars->new_var);
 	efivars->new_var = NULL;
 	return error;
@@ -803,6 +803,8 @@ efivars_init(void)
 	ops.set_variable = efi.set_variable;
 	ops.get_next_variable = efi.get_next_variable;
 	error = register_efivars(&__efivars, &ops, efi_kobj);
+	if (error)
+		goto err_put;
 
 	/* Don't forget the systab entry */
 	error = sysfs_create_group(efi_kobj, &efi_subsys_attr_group);
@@ -810,18 +812,25 @@ efivars_init(void)
 		printk(KERN_ERR
 		       "efivars: Sysfs attribute export failed with error %d.\n",
 		       error);
-		unregister_efivars(&__efivars);
-		kobject_put(efi_kobj);
+		goto err_unregister;
 	}
 
+	return 0;
+
+err_unregister:
+	unregister_efivars(&__efivars);
+err_put:
+	kobject_put(efi_kobj);
 	return error;
 }
 
 static void __exit
 efivars_exit(void)
 {
-	unregister_efivars(&__efivars);
-	kobject_put(efi_kobj);
+	if (efi_enabled) {
+		unregister_efivars(&__efivars);
+		kobject_put(efi_kobj);
+	}
 }
 
 module_init(efivars_init);
diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
new file mode 100644
index 000000000000..87096b6ca5c9
--- /dev/null
+++ b/drivers/firmware/google/Kconfig
@@ -0,0 +1,31 @@
+config GOOGLE_FIRMWARE
+	bool "Google Firmware Drivers"
+	depends on X86
+	default n
+	help
+	  These firmware drivers are used by Google's servers.  They are
+	  only useful if you are working directly on one of their
+	  proprietary servers.  If in doubt, say "N".
+
+menu "Google Firmware Drivers"
+	depends on GOOGLE_FIRMWARE
+
+config GOOGLE_SMI
+	tristate "SMI interface for Google platforms"
+	depends on ACPI && DMI
+	select EFI_VARS
+	help
+	  Say Y here if you want to enable SMI callbacks for Google
+	  platforms.  This provides an interface for writing to and
+	  clearing the EFI event log and reading and writing NVRAM
+	  variables.
+
+config GOOGLE_MEMCONSOLE
+	tristate "Firmware Memory Console"
+	depends on DMI
+	help
+	  This option enables the kernel to search for a firmware log in
+	  the EBDA on Google servers.  If found, this log is exported to
+	  userland in the file /sys/firmware/log.
+
+endmenu
diff --git a/drivers/firmware/google/Makefile b/drivers/firmware/google/Makefile
new file mode 100644
index 000000000000..54a294e3cb61
--- /dev/null
+++ b/drivers/firmware/google/Makefile
@@ -0,0 +1,3 @@
+
+obj-$(CONFIG_GOOGLE_SMI)		+= gsmi.o
+obj-$(CONFIG_GOOGLE_MEMCONSOLE)		+= memconsole.o
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
new file mode 100644
index 000000000000..fa7f0b3e81dd
--- /dev/null
+++ b/drivers/firmware/google/gsmi.c
@@ -0,0 +1,940 @@
+/*
+ * Copyright 2010 Google Inc. All Rights Reserved.
+ * Author: dlaurie@google.com (Duncan Laurie)
+ *
+ * Re-worked to expose sysfs APIs by mikew@google.com (Mike Waychison)
+ *
+ * EFI SMI interface for Google platforms
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/ioctl.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/dmi.h>
+#include <linux/kdebug.h>
+#include <linux/reboot.h>
+#include <linux/efi.h>
+
+#define GSMI_SHUTDOWN_CLEAN	0	/* Clean Shutdown */
+/* TODO(mikew@google.com): Tie in HARDLOCKUP_DETECTOR with NMIWDT */
+#define GSMI_SHUTDOWN_NMIWDT	1	/* NMI Watchdog */
+#define GSMI_SHUTDOWN_PANIC	2	/* Panic */
+#define GSMI_SHUTDOWN_OOPS	3	/* Oops */
+#define GSMI_SHUTDOWN_DIE	4	/* Die -- No longer meaningful */
+#define GSMI_SHUTDOWN_MCE	5	/* Machine Check */
+#define GSMI_SHUTDOWN_SOFTWDT	6	/* Software Watchdog */
+#define GSMI_SHUTDOWN_MBE	7	/* Uncorrected ECC */
+#define GSMI_SHUTDOWN_TRIPLE	8	/* Triple Fault */
+
+#define DRIVER_VERSION		"1.0"
+#define GSMI_GUID_SIZE		16
+#define GSMI_BUF_SIZE		1024
+#define GSMI_BUF_ALIGN		sizeof(u64)
+#define GSMI_CALLBACK		0xef
+
+/* SMI return codes */
+#define GSMI_SUCCESS		0x00
+#define GSMI_UNSUPPORTED2	0x03
+#define GSMI_LOG_FULL		0x0b
+#define GSMI_VAR_NOT_FOUND	0x0e
+#define GSMI_HANDSHAKE_SPIN	0x7d
+#define GSMI_HANDSHAKE_CF	0x7e
+#define GSMI_HANDSHAKE_NONE	0x7f
+#define GSMI_INVALID_PARAMETER	0x82
+#define GSMI_UNSUPPORTED	0x83
+#define GSMI_BUFFER_TOO_SMALL	0x85
+#define GSMI_NOT_READY		0x86
+#define GSMI_DEVICE_ERROR	0x87
+#define GSMI_NOT_FOUND		0x8e
+
+#define QUIRKY_BOARD_HASH 0x78a30a50
+
+/* Internally used commands passed to the firmware */
+#define GSMI_CMD_GET_NVRAM_VAR		0x01
+#define GSMI_CMD_GET_NEXT_VAR		0x02
+#define GSMI_CMD_SET_NVRAM_VAR		0x03
+#define GSMI_CMD_SET_EVENT_LOG		0x08
+#define GSMI_CMD_CLEAR_EVENT_LOG	0x09
+#define GSMI_CMD_CLEAR_CONFIG		0x20
+#define GSMI_CMD_HANDSHAKE_TYPE		0xC1
+
+/* Magic entry type for kernel events */
+#define GSMI_LOG_ENTRY_TYPE_KERNEL     0xDEAD
+
+/* SMI buffers must be in 32bit physical address space */
+struct gsmi_buf {
+	u8 *start;			/* start of buffer */
+	size_t length;			/* length of buffer */
+	dma_addr_t handle;		/* dma allocation handle */
+	u32 address;			/* physical address of buffer */
+};
+
+struct gsmi_device {
+	struct platform_device *pdev;	/* platform device */
+	struct gsmi_buf *name_buf;	/* variable name buffer */
+	struct gsmi_buf *data_buf;	/* generic data buffer */
+	struct gsmi_buf *param_buf;	/* parameter buffer */
+	spinlock_t lock;		/* serialize access to SMIs */
+	u16 smi_cmd;			/* SMI command port */
+	int handshake_type;		/* firmware handler interlock type */
+	struct dma_pool *dma_pool;	/* DMA buffer pool */
+} gsmi_dev;
+
+/* Packed structures for communicating with the firmware */
+struct gsmi_nvram_var_param {
+	efi_guid_t	guid;
+	u32		name_ptr;
+	u32		attributes;
+	u32		data_len;
+	u32		data_ptr;
+} __packed;
+
+struct gsmi_get_next_var_param {
+	u8	guid[GSMI_GUID_SIZE];
+	u32	name_ptr;
+	u32	name_len;
+} __packed;
+
+struct gsmi_set_eventlog_param {
+	u32	data_ptr;
+	u32	data_len;
+	u32	type;
+} __packed;
+
+/* Event log formats */
+struct gsmi_log_entry_type_1 {
+	u16	type;
+	u32	instance;
+} __packed;
+
+
+/*
+ * Some platforms don't have explicit SMI handshake
+ * and need to wait for SMI to complete.
+ */
+#define GSMI_DEFAULT_SPINCOUNT	0x10000
+static unsigned int spincount = GSMI_DEFAULT_SPINCOUNT;
+module_param(spincount, uint, 0600);
+MODULE_PARM_DESC(spincount,
+	"The number of loop iterations to use when using the spin handshake.");
+
+static struct gsmi_buf *gsmi_buf_alloc(void)
+{
+	struct gsmi_buf *smibuf;
+
+	smibuf = kzalloc(sizeof(*smibuf), GFP_KERNEL);
+	if (!smibuf) {
+		printk(KERN_ERR "gsmi: out of memory\n");
+		return NULL;
+	}
+
+	/* allocate buffer in 32bit address space */
+	smibuf->start = dma_pool_alloc(gsmi_dev.dma_pool, GFP_KERNEL,
+				       &smibuf->handle);
+	if (!smibuf->start) {
+		printk(KERN_ERR "gsmi: failed to allocate name buffer\n");
+		kfree(smibuf);
+		return NULL;
+	}
+
+	/* fill in the buffer handle */
+	smibuf->length = GSMI_BUF_SIZE;
+	smibuf->address = (u32)virt_to_phys(smibuf->start);
+
+	return smibuf;
+}
+
+static void gsmi_buf_free(struct gsmi_buf *smibuf)
+{
+	if (smibuf) {
+		if (smibuf->start)
+			dma_pool_free(gsmi_dev.dma_pool, smibuf->start,
+				      smibuf->handle);
+		kfree(smibuf);
+	}
+}
+
+/*
+ * Make a call to gsmi func(sub).  GSMI error codes are translated to
+ * in-kernel errnos (0 on success, -ERRNO on error).
+ */
+static int gsmi_exec(u8 func, u8 sub)
+{
+	u16 cmd = (sub << 8) | func;
+	u16 result = 0;
+	int rc = 0;
+
+	/*
+	 * AH  : Subfunction number
+	 * AL  : Function number
+	 * EBX : Parameter block address
+	 * DX  : SMI command port
+	 *
+	 * Three protocols here. See also the comment in gsmi_init().
+	 */
+	if (gsmi_dev.handshake_type == GSMI_HANDSHAKE_CF) {
+		/*
+		 * If handshake_type == HANDSHAKE_CF then set CF on the
+		 * way in and wait for the handler to clear it; this avoids
+		 * corrupting register state on those chipsets which have
+		 * a delay between writing the SMI trigger register and
+		 * entering SMM.
+		 */
+		asm volatile (
+			"stc\n"
+			"outb %%al, %%dx\n"
+		"1:      jc 1b\n"
+			: "=a" (result)
+			: "0" (cmd),
+			  "d" (gsmi_dev.smi_cmd),
+			  "b" (gsmi_dev.param_buf->address)
+			: "memory", "cc"
+		);
+	} else if (gsmi_dev.handshake_type == GSMI_HANDSHAKE_SPIN) {
+		/*
+		 * If handshake_type == HANDSHAKE_SPIN we spin a
+		 * hundred-ish usecs to ensure the SMI has triggered.
+		 */
+		asm volatile (
+			"outb %%al, %%dx\n"
+		"1:      loop 1b\n"
+			: "=a" (result)
+			: "0" (cmd),
+			  "d" (gsmi_dev.smi_cmd),
+			  "b" (gsmi_dev.param_buf->address),
+			  "c" (spincount)
+			: "memory", "cc"
+		);
+	} else {
+		/*
+		 * If handshake_type == HANDSHAKE_NONE we do nothing;
+		 * either we don't need to or it's legacy firmware that
+		 * doesn't understand the CF protocol.
+		 */
+		asm volatile (
+			"outb %%al, %%dx\n\t"
+			: "=a" (result)
+			: "0" (cmd),
+			  "d" (gsmi_dev.smi_cmd),
+			  "b" (gsmi_dev.param_buf->address)
+			: "memory", "cc"
+		);
+	}
+
+	/* check return code from SMI handler */
+	switch (result) {
+	case GSMI_SUCCESS:
+		break;
+	case GSMI_VAR_NOT_FOUND:
+		/* not really an error, but let the caller know */
+		rc = 1;
+		break;
+	case GSMI_INVALID_PARAMETER:
+		printk(KERN_ERR "gsmi: exec 0x%04x: Invalid parameter\n", cmd);
+		rc = -EINVAL;
+		break;
+	case GSMI_BUFFER_TOO_SMALL:
+		printk(KERN_ERR "gsmi: exec 0x%04x: Buffer too small\n", cmd);
+		rc = -ENOMEM;
+		break;
+	case GSMI_UNSUPPORTED:
+	case GSMI_UNSUPPORTED2:
+		if (sub != GSMI_CMD_HANDSHAKE_TYPE)
+			printk(KERN_ERR "gsmi: exec 0x%04x: Not supported\n",
+			       cmd);
+		rc = -ENOSYS;
+		break;
+	case GSMI_NOT_READY:
+		printk(KERN_ERR "gsmi: exec 0x%04x: Not ready\n", cmd);
+		rc = -EBUSY;
+		break;
+	case GSMI_DEVICE_ERROR:
+		printk(KERN_ERR "gsmi: exec 0x%04x: Device error\n", cmd);
+		rc = -EFAULT;
+		break;
+	case GSMI_NOT_FOUND:
+		printk(KERN_ERR "gsmi: exec 0x%04x: Data not found\n", cmd);
+		rc = -ENOENT;
+		break;
+	case GSMI_LOG_FULL:
+		printk(KERN_ERR "gsmi: exec 0x%04x: Log full\n", cmd);
+		rc = -ENOSPC;
+		break;
+	case GSMI_HANDSHAKE_CF:
+	case GSMI_HANDSHAKE_SPIN:
+	case GSMI_HANDSHAKE_NONE:
+		rc = result;
+		break;
+	default:
+		printk(KERN_ERR "gsmi: exec 0x%04x: Unknown error 0x%04x\n",
+		       cmd, result);
+		rc = -ENXIO;
+	}
+
+	return rc;
+}
+
+/* Return the number of unicode characters in data */
+static size_t
+utf16_strlen(efi_char16_t *data, unsigned long maxlength)
+{
+	unsigned long length = 0;
+
+	while (*data++ != 0 && length < maxlength)
+		length++;
+	return length;
+}
+
+static efi_status_t gsmi_get_variable(efi_char16_t *name,
+				      efi_guid_t *vendor, u32 *attr,
+				      unsigned long *data_size,
+				      void *data)
+{
+	struct gsmi_nvram_var_param param = {
+		.name_ptr = gsmi_dev.name_buf->address,
+		.data_ptr = gsmi_dev.data_buf->address,
+		.data_len = (u32)*data_size,
+	};
+	efi_status_t ret = EFI_SUCCESS;
+	unsigned long flags;
+	size_t name_len = utf16_strlen(name, GSMI_BUF_SIZE / 2);
+	int rc;
+
+	if (name_len >= GSMI_BUF_SIZE / 2)
+		return EFI_BAD_BUFFER_SIZE;
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	/* Vendor guid */
+	memcpy(&param.guid, vendor, sizeof(param.guid));
+
+	/* variable name, already in UTF-16 */
+	memset(gsmi_dev.name_buf->start, 0, gsmi_dev.name_buf->length);
+	memcpy(gsmi_dev.name_buf->start, name, name_len * 2);
+
+	/* data pointer */
+	memset(gsmi_dev.data_buf->start, 0, gsmi_dev.data_buf->length);
+
+	/* parameter buffer */
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+	memcpy(gsmi_dev.param_buf->start, &param, sizeof(param));
+
+	rc = gsmi_exec(GSMI_CALLBACK, GSMI_CMD_GET_NVRAM_VAR);
+	if (rc < 0) {
+		printk(KERN_ERR "gsmi: Get Variable failed\n");
+		ret = EFI_LOAD_ERROR;
+	} else if (rc == 1) {
+		/* variable was not found */
+		ret = EFI_NOT_FOUND;
+	} else {
+		/* Get the arguments back */
+		memcpy(&param, gsmi_dev.param_buf->start, sizeof(param));
+
+		/* The size reported is the min of all of our buffers */
+		*data_size = min(*data_size, gsmi_dev.data_buf->length);
+		*data_size = min_t(unsigned long, *data_size, param.data_len);
+
+		/* Copy data back to return buffer. */
+		memcpy(data, gsmi_dev.data_buf->start, *data_size);
+
+		/* All variables are have the following attributes */
+		*attr = EFI_VARIABLE_NON_VOLATILE |
+			EFI_VARIABLE_BOOTSERVICE_ACCESS |
+			EFI_VARIABLE_RUNTIME_ACCESS;
+	}
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	return ret;
+}
+
+static efi_status_t gsmi_get_next_variable(unsigned long *name_size,
+					   efi_char16_t *name,
+					   efi_guid_t *vendor)
+{
+	struct gsmi_get_next_var_param param = {
+		.name_ptr = gsmi_dev.name_buf->address,
+		.name_len = gsmi_dev.name_buf->length,
+	};
+	efi_status_t ret = EFI_SUCCESS;
+	int rc;
+	unsigned long flags;
+
+	/* For the moment, only support buffers that exactly match in size */
+	if (*name_size != GSMI_BUF_SIZE)
+		return EFI_BAD_BUFFER_SIZE;
+
+	/* Let's make sure the thing is at least null-terminated */
+	if (utf16_strlen(name, GSMI_BUF_SIZE / 2) == GSMI_BUF_SIZE / 2)
+		return EFI_INVALID_PARAMETER;
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	/* guid */
+	memcpy(&param.guid, vendor, sizeof(param.guid));
+
+	/* variable name, already in UTF-16 */
+	memcpy(gsmi_dev.name_buf->start, name, *name_size);
+
+	/* parameter buffer */
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+	memcpy(gsmi_dev.param_buf->start, &param, sizeof(param));
+
+	rc = gsmi_exec(GSMI_CALLBACK, GSMI_CMD_GET_NEXT_VAR);
+	if (rc < 0) {
+		printk(KERN_ERR "gsmi: Get Next Variable Name failed\n");
+		ret = EFI_LOAD_ERROR;
+	} else if (rc == 1) {
+		/* variable not found -- end of list */
+		ret = EFI_NOT_FOUND;
+	} else {
+		/* copy variable data back to return buffer */
+		memcpy(&param, gsmi_dev.param_buf->start, sizeof(param));
+
+		/* Copy the name back */
+		memcpy(name, gsmi_dev.name_buf->start, GSMI_BUF_SIZE);
+		*name_size = utf16_strlen(name, GSMI_BUF_SIZE / 2) * 2;
+
+		/* copy guid to return buffer */
+		memcpy(vendor, &param.guid, sizeof(param.guid));
+		ret = EFI_SUCCESS;
+	}
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	return ret;
+}
+
+static efi_status_t gsmi_set_variable(efi_char16_t *name,
+				      efi_guid_t *vendor,
+				      unsigned long attr,
+				      unsigned long data_size,
+				      void *data)
+{
+	struct gsmi_nvram_var_param param = {
+		.name_ptr = gsmi_dev.name_buf->address,
+		.data_ptr = gsmi_dev.data_buf->address,
+		.data_len = (u32)data_size,
+		.attributes = EFI_VARIABLE_NON_VOLATILE |
+			      EFI_VARIABLE_BOOTSERVICE_ACCESS |
+			      EFI_VARIABLE_RUNTIME_ACCESS,
+	};
+	size_t name_len = utf16_strlen(name, GSMI_BUF_SIZE / 2);
+	efi_status_t ret = EFI_SUCCESS;
+	int rc;
+	unsigned long flags;
+
+	if (name_len >= GSMI_BUF_SIZE / 2)
+		return EFI_BAD_BUFFER_SIZE;
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	/* guid */
+	memcpy(&param.guid, vendor, sizeof(param.guid));
+
+	/* variable name, already in UTF-16 */
+	memset(gsmi_dev.name_buf->start, 0, gsmi_dev.name_buf->length);
+	memcpy(gsmi_dev.name_buf->start, name, name_len * 2);
+
+	/* data pointer */
+	memset(gsmi_dev.data_buf->start, 0, gsmi_dev.data_buf->length);
+	memcpy(gsmi_dev.data_buf->start, data, data_size);
+
+	/* parameter buffer */
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+	memcpy(gsmi_dev.param_buf->start, &param, sizeof(param));
+
+	rc = gsmi_exec(GSMI_CALLBACK, GSMI_CMD_SET_NVRAM_VAR);
+	if (rc < 0) {
+		printk(KERN_ERR "gsmi: Set Variable failed\n");
+		ret = EFI_INVALID_PARAMETER;
+	}
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	return ret;
+}
+
+static const struct efivar_operations efivar_ops = {
+	.get_variable = gsmi_get_variable,
+	.set_variable = gsmi_set_variable,
+	.get_next_variable = gsmi_get_next_variable,
+};
+
+static ssize_t eventlog_write(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buf, loff_t pos, size_t count)
+{
+	struct gsmi_set_eventlog_param param = {
+		.data_ptr = gsmi_dev.data_buf->address,
+	};
+	int rc = 0;
+	unsigned long flags;
+
+	/* Pull the type out */
+	if (count < sizeof(u32))
+		return -EINVAL;
+	param.type = *(u32 *)buf;
+	count -= sizeof(u32);
+	buf += sizeof(u32);
+
+	/* The remaining buffer is the data payload */
+	if (count > gsmi_dev.data_buf->length)
+		return -EINVAL;
+	param.data_len = count - sizeof(u32);
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	/* data pointer */
+	memset(gsmi_dev.data_buf->start, 0, gsmi_dev.data_buf->length);
+	memcpy(gsmi_dev.data_buf->start, buf, param.data_len);
+
+	/* parameter buffer */
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+	memcpy(gsmi_dev.param_buf->start, &param, sizeof(param));
+
+	rc = gsmi_exec(GSMI_CALLBACK, GSMI_CMD_SET_EVENT_LOG);
+	if (rc < 0)
+		printk(KERN_ERR "gsmi: Set Event Log failed\n");
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	return rc;
+
+}
+
+static struct bin_attribute eventlog_bin_attr = {
+	.attr = {.name = "append_to_eventlog", .mode = 0200},
+	.write = eventlog_write,
+};
+
+static ssize_t gsmi_clear_eventlog_store(struct kobject *kobj,
+					 struct kobj_attribute *attr,
+					 const char *buf, size_t count)
+{
+	int rc;
+	unsigned long flags;
+	unsigned long val;
+	struct {
+		u32 percentage;
+		u32 data_type;
+	} param;
+
+	rc = strict_strtoul(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	/*
+	 * Value entered is a percentage, 0 through 100, anything else
+	 * is invalid.
+	 */
+	if (val > 100)
+		return -EINVAL;
+
+	/* data_type here selects the smbios event log. */
+	param.percentage = val;
+	param.data_type = 0;
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	/* parameter buffer */
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+	memcpy(gsmi_dev.param_buf->start, &param, sizeof(param));
+
+	rc = gsmi_exec(GSMI_CALLBACK, GSMI_CMD_CLEAR_EVENT_LOG);
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	if (rc)
+		return rc;
+	return count;
+}
+
+static struct kobj_attribute gsmi_clear_eventlog_attr = {
+	.attr = {.name = "clear_eventlog", .mode = 0200},
+	.store = gsmi_clear_eventlog_store,
+};
+
+static ssize_t gsmi_clear_config_store(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       const char *buf, size_t count)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	/* clear parameter buffer */
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+
+	rc = gsmi_exec(GSMI_CALLBACK, GSMI_CMD_CLEAR_CONFIG);
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	if (rc)
+		return rc;
+	return count;
+}
+
+static struct kobj_attribute gsmi_clear_config_attr = {
+	.attr = {.name = "clear_config", .mode = 0200},
+	.store = gsmi_clear_config_store,
+};
+
+static const struct attribute *gsmi_attrs[] = {
+	&gsmi_clear_config_attr.attr,
+	&gsmi_clear_eventlog_attr.attr,
+	NULL,
+};
+
+static int gsmi_shutdown_reason(int reason)
+{
+	struct gsmi_log_entry_type_1 entry = {
+		.type     = GSMI_LOG_ENTRY_TYPE_KERNEL,
+		.instance = reason,
+	};
+	struct gsmi_set_eventlog_param param = {
+		.data_len = sizeof(entry),
+		.type     = 1,
+	};
+	static int saved_reason;
+	int rc = 0;
+	unsigned long flags;
+
+	/* avoid duplicate entries in the log */
+	if (saved_reason & (1 << reason))
+		return 0;
+
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+
+	saved_reason |= (1 << reason);
+
+	/* data pointer */
+	memset(gsmi_dev.data_buf->start, 0, gsmi_dev.data_buf->length);
+	memcpy(gsmi_dev.data_buf->start, &entry, sizeof(entry));
+
+	/* parameter buffer */
+	param.data_ptr = gsmi_dev.data_buf->address;
+	memset(gsmi_dev.param_buf->start, 0, gsmi_dev.param_buf->length);
+	memcpy(gsmi_dev.param_buf->start, &param, sizeof(param));
+
+	rc = gsmi_exec(GSMI_CALLBACK, GSMI_CMD_SET_EVENT_LOG);
+
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	if (rc < 0)
+		printk(KERN_ERR "gsmi: Log Shutdown Reason failed\n");
+	else
+		printk(KERN_EMERG "gsmi: Log Shutdown Reason 0x%02x\n",
+		       reason);
+
+	return rc;
+}
+
+static int gsmi_reboot_callback(struct notifier_block *nb,
+				unsigned long reason, void *arg)
+{
+	gsmi_shutdown_reason(GSMI_SHUTDOWN_CLEAN);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block gsmi_reboot_notifier = {
+	.notifier_call = gsmi_reboot_callback
+};
+
+static int gsmi_die_callback(struct notifier_block *nb,
+			     unsigned long reason, void *arg)
+{
+	if (reason == DIE_OOPS)
+		gsmi_shutdown_reason(GSMI_SHUTDOWN_OOPS);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block gsmi_die_notifier = {
+	.notifier_call = gsmi_die_callback
+};
+
+static int gsmi_panic_callback(struct notifier_block *nb,
+			       unsigned long reason, void *arg)
+{
+	gsmi_shutdown_reason(GSMI_SHUTDOWN_PANIC);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block gsmi_panic_notifier = {
+	.notifier_call = gsmi_panic_callback,
+};
+
+/*
+ * This hash function was blatantly copied from include/linux/hash.h.
+ * It is used by this driver to obfuscate a board name that requires a
+ * quirk within this driver.
+ *
+ * Please do not remove this copy of the function as any changes to the
+ * global utility hash_64() function would break this driver's ability
+ * to identify a board and provide the appropriate quirk -- mikew@google.com
+ */
+static u64 __init local_hash_64(u64 val, unsigned bits)
+{
+	u64 hash = val;
+
+	/*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
+	u64 n = hash;
+	n <<= 18;
+	hash -= n;
+	n <<= 33;
+	hash -= n;
+	n <<= 3;
+	hash += n;
+	n <<= 3;
+	hash -= n;
+	n <<= 4;
+	hash += n;
+	n <<= 2;
+	hash += n;
+
+	/* High bits are more random, so use them. */
+	return hash >> (64 - bits);
+}
+
+static u32 __init hash_oem_table_id(char s[8])
+{
+	u64 input;
+	memcpy(&input, s, 8);
+	return local_hash_64(input, 32);
+}
+
+static struct dmi_system_id gsmi_dmi_table[] __initdata = {
+	{
+		.ident = "Google Board",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Google, Inc."),
+		},
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(dmi, gsmi_dmi_table);
+
+static __init int gsmi_system_valid(void)
+{
+	u32 hash;
+
+	if (!dmi_check_system(gsmi_dmi_table))
+		return -ENODEV;
+
+	/*
+	 * Only newer firmware supports the gsmi interface.  All older
+	 * firmware that didn't support this interface used to plug the
+	 * table name in the first four bytes of the oem_table_id field.
+	 * Newer firmware doesn't do that though, so use that as the
+	 * discriminant factor.  We have to do this in order to
+	 * whitewash our board names out of the public driver.
+	 */
+	if (!strncmp(acpi_gbl_FADT.header.oem_table_id, "FACP", 4)) {
+		printk(KERN_INFO "gsmi: Board is too old\n");
+		return -ENODEV;
+	}
+
+	/* Disable on board with 1.0 BIOS due to Google bug 2602657 */
+	hash = hash_oem_table_id(acpi_gbl_FADT.header.oem_table_id);
+	if (hash == QUIRKY_BOARD_HASH) {
+		const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
+		if (strncmp(bios_ver, "1.0", 3) == 0) {
+			pr_info("gsmi: disabled on this board's BIOS %s\n",
+				bios_ver);
+			return -ENODEV;
+		}
+	}
+
+	/* check for valid SMI command port in ACPI FADT */
+	if (acpi_gbl_FADT.smi_command == 0) {
+		pr_info("gsmi: missing smi_command\n");
+		return -ENODEV;
+	}
+
+	/* Found */
+	return 0;
+}
+
+static struct kobject *gsmi_kobj;
+static struct efivars efivars;
+
+static __init int gsmi_init(void)
+{
+	unsigned long flags;
+	int ret;
+
+	ret = gsmi_system_valid();
+	if (ret)
+		return ret;
+
+	gsmi_dev.smi_cmd = acpi_gbl_FADT.smi_command;
+
+	/* register device */
+	gsmi_dev.pdev = platform_device_register_simple("gsmi", -1, NULL, 0);
+	if (IS_ERR(gsmi_dev.pdev)) {
+		printk(KERN_ERR "gsmi: unable to register platform device\n");
+		return PTR_ERR(gsmi_dev.pdev);
+	}
+
+	/* SMI access needs to be serialized */
+	spin_lock_init(&gsmi_dev.lock);
+
+	/* SMI callbacks require 32bit addresses */
+	gsmi_dev.pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	gsmi_dev.pdev->dev.dma_mask =
+		&gsmi_dev.pdev->dev.coherent_dma_mask;
+	ret = -ENOMEM;
+	gsmi_dev.dma_pool = dma_pool_create("gsmi", &gsmi_dev.pdev->dev,
+					     GSMI_BUF_SIZE, GSMI_BUF_ALIGN, 0);
+	if (!gsmi_dev.dma_pool)
+		goto out_err;
+
+	/*
+	 * pre-allocate buffers because sometimes we are called when
+	 * this is not feasible: oops, panic, die, mce, etc
+	 */
+	gsmi_dev.name_buf = gsmi_buf_alloc();
+	if (!gsmi_dev.name_buf) {
+		printk(KERN_ERR "gsmi: failed to allocate name buffer\n");
+		goto out_err;
+	}
+
+	gsmi_dev.data_buf = gsmi_buf_alloc();
+	if (!gsmi_dev.data_buf) {
+		printk(KERN_ERR "gsmi: failed to allocate data buffer\n");
+		goto out_err;
+	}
+
+	gsmi_dev.param_buf = gsmi_buf_alloc();
+	if (!gsmi_dev.param_buf) {
+		printk(KERN_ERR "gsmi: failed to allocate param buffer\n");
+		goto out_err;
+	}
+
+	/*
+	 * Determine type of handshake used to serialize the SMI
+	 * entry. See also gsmi_exec().
+	 *
+	 * There's a "behavior" present on some chipsets where writing the
+	 * SMI trigger register in the southbridge doesn't result in an
+	 * immediate SMI. Rather, the processor can execute "a few" more
+	 * instructions before the SMI takes effect. To ensure synchronous
+	 * behavior, implement a handshake between the kernel driver and the
+	 * firmware handler to spin until released. This ioctl determines
+	 * the type of handshake.
+	 *
+	 * NONE: The firmware handler does not implement any
+	 * handshake. Either it doesn't need to, or it's legacy firmware
+	 * that doesn't know it needs to and never will.
+	 *
+	 * CF: The firmware handler will clear the CF in the saved
+	 * state before returning. The driver may set the CF and test for
+	 * it to clear before proceeding.
+	 *
+	 * SPIN: The firmware handler does not implement any handshake
+	 * but the driver should spin for a hundred or so microseconds
+	 * to ensure the SMI has triggered.
+	 *
+	 * Finally, the handler will return -ENOSYS if
+	 * GSMI_CMD_HANDSHAKE_TYPE is unimplemented, which implies
+	 * HANDSHAKE_NONE.
+	 */
+	spin_lock_irqsave(&gsmi_dev.lock, flags);
+	gsmi_dev.handshake_type = GSMI_HANDSHAKE_SPIN;
+	gsmi_dev.handshake_type =
+	    gsmi_exec(GSMI_CALLBACK, GSMI_CMD_HANDSHAKE_TYPE);
+	if (gsmi_dev.handshake_type == -ENOSYS)
+		gsmi_dev.handshake_type = GSMI_HANDSHAKE_NONE;
+	spin_unlock_irqrestore(&gsmi_dev.lock, flags);
+
+	/* Remove and clean up gsmi if the handshake could not complete. */
+	if (gsmi_dev.handshake_type == -ENXIO) {
+		printk(KERN_INFO "gsmi version " DRIVER_VERSION
+		       " failed to load\n");
+		ret = -ENODEV;
+		goto out_err;
+	}
+
+	printk(KERN_INFO "gsmi version " DRIVER_VERSION " loaded\n");
+
+	/* Register in the firmware directory */
+	ret = -ENOMEM;
+	gsmi_kobj = kobject_create_and_add("gsmi", firmware_kobj);
+	if (!gsmi_kobj) {
+		printk(KERN_INFO "gsmi: Failed to create firmware kobj\n");
+		goto out_err;
+	}
+
+	/* Setup eventlog access */
+	ret = sysfs_create_bin_file(gsmi_kobj, &eventlog_bin_attr);
+	if (ret) {
+		printk(KERN_INFO "gsmi: Failed to setup eventlog");
+		goto out_err;
+	}
+
+	/* Other attributes */
+	ret = sysfs_create_files(gsmi_kobj, gsmi_attrs);
+	if (ret) {
+		printk(KERN_INFO "gsmi: Failed to add attrs");
+		goto out_err;
+	}
+
+	if (register_efivars(&efivars, &efivar_ops, gsmi_kobj)) {
+		printk(KERN_INFO "gsmi: Failed to register efivars\n");
+		goto out_err;
+	}
+
+	register_reboot_notifier(&gsmi_reboot_notifier);
+	register_die_notifier(&gsmi_die_notifier);
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &gsmi_panic_notifier);
+
+	return 0;
+
+ out_err:
+	kobject_put(gsmi_kobj);
+	gsmi_buf_free(gsmi_dev.param_buf);
+	gsmi_buf_free(gsmi_dev.data_buf);
+	gsmi_buf_free(gsmi_dev.name_buf);
+	if (gsmi_dev.dma_pool)
+		dma_pool_destroy(gsmi_dev.dma_pool);
+	platform_device_unregister(gsmi_dev.pdev);
+	pr_info("gsmi: failed to load: %d\n", ret);
+	return ret;
+}
+
+static void __exit gsmi_exit(void)
+{
+	unregister_reboot_notifier(&gsmi_reboot_notifier);
+	unregister_die_notifier(&gsmi_die_notifier);
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &gsmi_panic_notifier);
+	unregister_efivars(&efivars);
+
+	kobject_put(gsmi_kobj);
+	gsmi_buf_free(gsmi_dev.param_buf);
+	gsmi_buf_free(gsmi_dev.data_buf);
+	gsmi_buf_free(gsmi_dev.name_buf);
+	dma_pool_destroy(gsmi_dev.dma_pool);
+	platform_device_unregister(gsmi_dev.pdev);
+}
+
+module_init(gsmi_init);
+module_exit(gsmi_exit);
+
+MODULE_AUTHOR("Google, Inc.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/google/memconsole.c b/drivers/firmware/google/memconsole.c
new file mode 100644
index 000000000000..2a90ba613613
--- /dev/null
+++ b/drivers/firmware/google/memconsole.c
@@ -0,0 +1,166 @@
+/*
+ * memconsole.c
+ *
+ * Infrastructure for importing the BIOS memory based console
+ * into the kernel log ringbuffer.
+ *
+ * Copyright 2010 Google Inc. All rights reserved.
+ */
+
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <asm/bios_ebda.h>
+
+#define BIOS_MEMCONSOLE_V1_MAGIC	0xDEADBABE
+#define BIOS_MEMCONSOLE_V2_MAGIC	(('M')|('C'<<8)|('O'<<16)|('N'<<24))
+
+struct biosmemcon_ebda {
+	u32 signature;
+	union {
+		struct {
+			u8  enabled;
+			u32 buffer_addr;
+			u16 start;
+			u16 end;
+			u16 num_chars;
+			u8  wrapped;
+		} __packed v1;
+		struct {
+			u32 buffer_addr;
+			/* Misdocumented as number of pages! */
+			u16 num_bytes;
+			u16 start;
+			u16 end;
+		} __packed v2;
+	};
+} __packed;
+
+static char *memconsole_baseaddr;
+static size_t memconsole_length;
+
+static ssize_t memconsole_read(struct file *filp, struct kobject *kobp,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t pos, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &pos, memconsole_baseaddr,
+				       memconsole_length);
+}
+
+static struct bin_attribute memconsole_bin_attr = {
+	.attr = {.name = "log", .mode = 0444},
+	.read = memconsole_read,
+};
+
+
+static void found_v1_header(struct biosmemcon_ebda *hdr)
+{
+	printk(KERN_INFO "BIOS console v1 EBDA structure found at %p\n", hdr);
+	printk(KERN_INFO "BIOS console buffer at 0x%.8x, "
+	       "start = %d, end = %d, num = %d\n",
+	       hdr->v1.buffer_addr, hdr->v1.start,
+	       hdr->v1.end, hdr->v1.num_chars);
+
+	memconsole_length = hdr->v1.num_chars;
+	memconsole_baseaddr = phys_to_virt(hdr->v1.buffer_addr);
+}
+
+static void found_v2_header(struct biosmemcon_ebda *hdr)
+{
+	printk(KERN_INFO "BIOS console v2 EBDA structure found at %p\n", hdr);
+	printk(KERN_INFO "BIOS console buffer at 0x%.8x, "
+	       "start = %d, end = %d, num_bytes = %d\n",
+	       hdr->v2.buffer_addr, hdr->v2.start,
+	       hdr->v2.end, hdr->v2.num_bytes);
+
+	memconsole_length = hdr->v2.end - hdr->v2.start;
+	memconsole_baseaddr = phys_to_virt(hdr->v2.buffer_addr
+					   + hdr->v2.start);
+}
+
+/*
+ * Search through the EBDA for the BIOS Memory Console, and
+ * set the global variables to point to it.  Return true if found.
+ */
+static bool found_memconsole(void)
+{
+	unsigned int address;
+	size_t length, cur;
+
+	address = get_bios_ebda();
+	if (!address) {
+		printk(KERN_INFO "BIOS EBDA non-existent.\n");
+		return false;
+	}
+
+	/* EBDA length is byte 0 of EBDA (in KB) */
+	length = *(u8 *)phys_to_virt(address);
+	length <<= 10; /* convert to bytes */
+
+	/*
+	 * Search through EBDA for BIOS memory console structure
+	 * note: signature is not necessarily dword-aligned
+	 */
+	for (cur = 0; cur < length; cur++) {
+		struct biosmemcon_ebda *hdr = phys_to_virt(address + cur);
+
+		/* memconsole v1 */
+		if (hdr->signature == BIOS_MEMCONSOLE_V1_MAGIC) {
+			found_v1_header(hdr);
+			return true;
+		}
+
+		/* memconsole v2 */
+		if (hdr->signature == BIOS_MEMCONSOLE_V2_MAGIC) {
+			found_v2_header(hdr);
+			return true;
+		}
+	}
+
+	printk(KERN_INFO "BIOS console EBDA structure not found!\n");
+	return false;
+}
+
+static struct dmi_system_id memconsole_dmi_table[] __initdata = {
+	{
+		.ident = "Google Board",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Google, Inc."),
+		},
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(dmi, memconsole_dmi_table);
+
+static int __init memconsole_init(void)
+{
+	int ret;
+
+	if (!dmi_check_system(memconsole_dmi_table))
+		return -ENODEV;
+
+	if (!found_memconsole())
+		return -ENODEV;
+
+	memconsole_bin_attr.size = memconsole_length;
+
+	ret = sysfs_create_bin_file(firmware_kobj, &memconsole_bin_attr);
+
+	return ret;
+}
+
+static void __exit memconsole_exit(void)
+{
+	sysfs_remove_bin_file(firmware_kobj, &memconsole_bin_attr);
+}
+
+module_init(memconsole_init);
+module_exit(memconsole_exit);
+
+MODULE_AUTHOR("Google, Inc.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 2192456dfd68..f032e446fc11 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -42,7 +42,20 @@
 struct acpi_table_ibft *ibft_addr;
 EXPORT_SYMBOL_GPL(ibft_addr);
 
-#define IBFT_SIGN "iBFT"
+static const struct {
+	char *sign;
+} ibft_signs[] = {
+#ifdef CONFIG_ACPI
+	/*
+	 * One spec says "IBFT", the other says "iBFT". We have to check
+	 * for both.
+	 */
+	{ ACPI_SIG_IBFT },
+#endif
+	{ "iBFT" },
+	{ "BIFT" },	/* Broadcom iSCSI Offload */
+};
+
 #define IBFT_SIGN_LEN 4
 #define IBFT_START 0x80000 /* 512kB */
 #define IBFT_END 0x100000 /* 1MB */
@@ -62,6 +75,7 @@ static int __init find_ibft_in_mem(void)
 	unsigned long pos;
 	unsigned int len = 0;
 	void *virt;
+	int i;
 
 	for (pos = IBFT_START; pos < IBFT_END; pos += 16) {
 		/* The table can't be inside the VGA BIOS reserved space,
@@ -69,18 +83,23 @@ static int __init find_ibft_in_mem(void)
 		if (pos == VGA_MEM)
 			pos += VGA_SIZE;
 		virt = isa_bus_to_virt(pos);
-		if (memcmp(virt, IBFT_SIGN, IBFT_SIGN_LEN) == 0) {
-			unsigned long *addr =
-			    (unsigned long *)isa_bus_to_virt(pos + 4);
-			len = *addr;
-			/* if the length of the table extends past 1M,
-			 * the table cannot be valid. */
-			if (pos + len <= (IBFT_END-1)) {
-				ibft_addr = (struct acpi_table_ibft *)virt;
-				break;
+
+		for (i = 0; i < ARRAY_SIZE(ibft_signs); i++) {
+			if (memcmp(virt, ibft_signs[i].sign, IBFT_SIGN_LEN) ==
+			    0) {
+				unsigned long *addr =
+				    (unsigned long *)isa_bus_to_virt(pos + 4);
+				len = *addr;
+				/* if the length of the table extends past 1M,
+				 * the table cannot be valid. */
+				if (pos + len <= (IBFT_END-1)) {
+					ibft_addr = (struct acpi_table_ibft *)virt;
+					goto done;
+				}
 			}
 		}
 	}
+done:
 	return len;
 }
 /*
@@ -89,18 +108,12 @@ static int __init find_ibft_in_mem(void)
  */
 unsigned long __init find_ibft_region(unsigned long *sizep)
 {
-
+	int i;
 	ibft_addr = NULL;
 
 #ifdef CONFIG_ACPI
-	/*
-	 * One spec says "IBFT", the other says "iBFT". We have to check
-	 * for both.
-	 */
-	if (!ibft_addr)
-		acpi_table_parse(ACPI_SIG_IBFT, acpi_find_ibft);
-	if (!ibft_addr)
-		acpi_table_parse(IBFT_SIGN, acpi_find_ibft);
+	for (i = 0; i < ARRAY_SIZE(ibft_signs) && !ibft_addr; i++)
+		acpi_table_parse(ibft_signs[i].sign, acpi_find_ibft);
 #endif /* CONFIG_ACPI */
 
 	/* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c
index 7f6f01a4b145..0a775f7987c2 100644
--- a/drivers/gpio/ml_ioh_gpio.c
+++ b/drivers/gpio/ml_ioh_gpio.c
@@ -116,6 +116,7 @@ static int ioh_gpio_direction_output(struct gpio_chip *gpio, unsigned nr,
 		reg_val |= (1 << nr);
 	else
 		reg_val &= ~(1 << nr);
+	iowrite32(reg_val, &chip->reg->regs[chip->ch].po);
 
 	mutex_unlock(&chip->lock);
 
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 583e92592073..7630ab7b9bec 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -558,7 +558,7 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 
 	ret = gpiochip_add(&chip->gpio_chip);
 	if (ret)
-		goto out_failed;
+		goto out_failed_irq;
 
 	if (pdata->setup) {
 		ret = pdata->setup(client, chip->gpio_chip.base,
@@ -570,8 +570,9 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, chip);
 	return 0;
 
-out_failed:
+out_failed_irq:
 	pca953x_irq_teardown(chip);
+out_failed:
 	kfree(chip->dyn_pdata);
 	kfree(chip);
 	return ret;
diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c
index 2c6af8705103..f970a5f3585e 100644
--- a/drivers/gpio/pch_gpio.c
+++ b/drivers/gpio/pch_gpio.c
@@ -105,6 +105,7 @@ static int pch_gpio_direction_output(struct gpio_chip *gpio, unsigned nr,
 		reg_val |= (1 << nr);
 	else
 		reg_val &= ~(1 << nr);
+	iowrite32(reg_val, &chip->reg->po);
 
 	mutex_unlock(&chip->lock);
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index a6feb78c404c..b493663c7ba7 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -24,6 +24,7 @@ config DRM_KMS_HELPER
 	depends on DRM
 	select FB
 	select FRAMEBUFFER_CONSOLE if !EXPERT
+	select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE
 	help
 	  FB and CRTC helpers for KMS drivers.
 
@@ -96,6 +97,7 @@ config DRM_I915
 	# i915 depends on ACPI_VIDEO when ACPI is enabled
 	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
 	select BACKLIGHT_CLASS_DEVICE if ACPI
+	select VIDEO_OUTPUT_CONTROL if ACPI
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 950720473967..140b9525b48a 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -342,9 +342,22 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 }
 EXPORT_SYMBOL(drm_fb_helper_debug_leave);
 
+bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper)
+{
+	bool error = false;
+	int i, ret;
+	for (i = 0; i < fb_helper->crtc_count; i++) {
+		struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set;
+		ret = drm_crtc_helper_set_config(mode_set);
+		if (ret)
+			error = true;
+	}
+	return error;
+}
+EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode);
+
 bool drm_fb_helper_force_kernel_mode(void)
 {
-	int i = 0;
 	bool ret, error = false;
 	struct drm_fb_helper *helper;
 
@@ -352,12 +365,12 @@ bool drm_fb_helper_force_kernel_mode(void)
 		return false;
 
 	list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) {
-		for (i = 0; i < helper->crtc_count; i++) {
-			struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set;
-			ret = drm_crtc_helper_set_config(mode_set);
-			if (ret)
-				error = true;
-		}
+		if (helper->dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+			continue;
+
+		ret = drm_fb_helper_restore_fbdev_mode(helper);
+		if (ret)
+			error = true;
 	}
 	return error;
 }
@@ -1503,17 +1516,33 @@ bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel)
 }
 EXPORT_SYMBOL(drm_fb_helper_initial_config);
 
-bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
+/**
+ * drm_fb_helper_hotplug_event - respond to a hotplug notification by
+ *                               probing all the outputs attached to the fb.
+ * @fb_helper: the drm_fb_helper
+ *
+ * LOCKING:
+ * Called at runtime, must take mode config lock.
+ *
+ * Scan the connectors attached to the fb_helper and try to put together a
+ * setup after *notification of a change in output configuration.
+ *
+ * RETURNS:
+ * 0 on success and a non-zero error code otherwise.
+ */
+int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 {
+	struct drm_device *dev = fb_helper->dev;
 	int count = 0;
 	u32 max_width, max_height, bpp_sel;
 	bool bound = false, crtcs_bound = false;
 	struct drm_crtc *crtc;
 
 	if (!fb_helper->fb)
-		return false;
+		return 0;
 
-	list_for_each_entry(crtc, &fb_helper->dev->mode_config.crtc_list, head) {
+	mutex_lock(&dev->mode_config.mutex);
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		if (crtc->fb)
 			crtcs_bound = true;
 		if (crtc->fb == fb_helper->fb)
@@ -1522,7 +1551,8 @@ bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 
 	if (!bound && crtcs_bound) {
 		fb_helper->delayed_hotplug = true;
-		return false;
+		mutex_unlock(&dev->mode_config.mutex);
+		return 0;
 	}
 	DRM_DEBUG_KMS("\n");
 
@@ -1533,6 +1563,7 @@ bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 	count = drm_fb_helper_probe_connector_modes(fb_helper, max_width,
 						    max_height);
 	drm_setup_crtcs(fb_helper);
+	mutex_unlock(&dev->mode_config.mutex);
 
 	return drm_fb_helper_single_fb_probe(fb_helper, bpp_sel);
 }
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 741457bd1c46..a1f12cb043de 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -932,11 +932,34 @@ EXPORT_SYMBOL(drm_vblank_put);
 
 void drm_vblank_off(struct drm_device *dev, int crtc)
 {
+	struct drm_pending_vblank_event *e, *t;
+	struct timeval now;
 	unsigned long irqflags;
+	unsigned int seq;
 
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
 	vblank_disable_and_save(dev, crtc);
 	DRM_WAKEUP(&dev->vbl_queue[crtc]);
+
+	/* Send any queued vblank events, lest the natives grow disquiet */
+	seq = drm_vblank_count_and_time(dev, crtc, &now);
+	list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) {
+		if (e->pipe != crtc)
+			continue;
+		DRM_DEBUG("Sending premature vblank event on disable: \
+			  wanted %d, current %d\n",
+			  e->event.sequence, seq);
+
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+		drm_vblank_put(dev, e->pipe);
+		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		trace_drm_vblank_event_delivered(e->base.pid, e->pipe,
+						 e->event.sequence);
+	}
+
 	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
 EXPORT_SYMBOL(drm_vblank_off);
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 5d00b0fc0d91..959186cbf328 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -431,7 +431,7 @@ EXPORT_SYMBOL(drm_mm_search_free_in_range);
 void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 {
 	list_replace(&old->node_list, &new->node_list);
-	list_replace(&old->node_list, &new->hole_stack);
+	list_replace(&old->hole_stack, &new->hole_stack);
 	new->hole_follows = old->hole_follows;
 	new->mm = old->mm;
 	new->start = old->start;
@@ -699,8 +699,8 @@ int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm)
 				entry->size);
 		total_used += entry->size;
 		if (entry->hole_follows) {
-			hole_start = drm_mm_hole_node_start(&mm->head_node);
-			hole_end = drm_mm_hole_node_end(&mm->head_node);
+			hole_start = drm_mm_hole_node_start(entry);
+			hole_end = drm_mm_hole_node_end(entry);
 			hole_size = hole_end - hole_start;
 			seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: free\n",
 					hole_start, hole_end, hole_size);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 72730377a01b..12876f2795d2 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2207,7 +2207,7 @@ void i915_driver_lastclose(struct drm_device * dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) {
-		drm_fb_helper_restore();
+		intel_fb_restore_mode(dev);
 		vga_switcheroo_process_delayed_switch();
 		return;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c34a8dd31d02..32d1b3e829c8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,7 +49,7 @@ module_param_named(panel_ignore_lid, i915_panel_ignore_lid, int, 0600);
 unsigned int i915_powersave = 1;
 module_param_named(powersave, i915_powersave, int, 0600);
 
-unsigned int i915_semaphores = 1;
+unsigned int i915_semaphores = 0;
 module_param_named(semaphores, i915_semaphores, int, 0600);
 
 unsigned int i915_enable_rc6 = 0;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 432fc04c6bff..2166ee071ddb 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3771,8 +3771,11 @@ static bool g4x_compute_wm0(struct drm_device *dev,
 	int entries, tlb_miss;
 
 	crtc = intel_get_crtc_for_plane(dev, plane);
-	if (crtc->fb == NULL || !crtc->enabled)
+	if (crtc->fb == NULL || !crtc->enabled) {
+		*cursor_wm = cursor->guard_size;
+		*plane_wm = display->guard_size;
 		return false;
+	}
 
 	htotal = crtc->mode.htotal;
 	hdisplay = crtc->mode.hdisplay;
@@ -5602,9 +5605,9 @@ static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc)
 	intel_clock_t clock;
 
 	if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0)
-		fp = FP0(pipe);
+		fp = I915_READ(FP0(pipe));
 	else
-		fp = FP1(pipe);
+		fp = I915_READ(FP1(pipe));
 
 	clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT;
 	if (IS_PINEVIEW(dev)) {
@@ -6215,36 +6218,6 @@ cleanup_work:
 	return ret;
 }
 
-static void intel_crtc_reset(struct drm_crtc *crtc)
-{
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
-	/* Reset flags back to the 'unknown' status so that they
-	 * will be correctly set on the initial modeset.
-	 */
-	intel_crtc->dpms_mode = -1;
-}
-
-static struct drm_crtc_helper_funcs intel_helper_funcs = {
-	.dpms = intel_crtc_dpms,
-	.mode_fixup = intel_crtc_mode_fixup,
-	.mode_set = intel_crtc_mode_set,
-	.mode_set_base = intel_pipe_set_base,
-	.mode_set_base_atomic = intel_pipe_set_base_atomic,
-	.load_lut = intel_crtc_load_lut,
-	.disable = intel_crtc_disable,
-};
-
-static const struct drm_crtc_funcs intel_crtc_funcs = {
-	.reset = intel_crtc_reset,
-	.cursor_set = intel_crtc_cursor_set,
-	.cursor_move = intel_crtc_cursor_move,
-	.gamma_set = intel_crtc_gamma_set,
-	.set_config = drm_crtc_helper_set_config,
-	.destroy = intel_crtc_destroy,
-	.page_flip = intel_crtc_page_flip,
-};
-
 static void intel_sanitize_modesetting(struct drm_device *dev,
 				       int pipe, int plane)
 {
@@ -6281,6 +6254,42 @@ static void intel_sanitize_modesetting(struct drm_device *dev,
 	intel_disable_pipe(dev_priv, pipe);
 }
 
+static void intel_crtc_reset(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	/* Reset flags back to the 'unknown' status so that they
+	 * will be correctly set on the initial modeset.
+	 */
+	intel_crtc->dpms_mode = -1;
+
+	/* We need to fix up any BIOS configuration that conflicts with
+	 * our expectations.
+	 */
+	intel_sanitize_modesetting(dev, intel_crtc->pipe, intel_crtc->plane);
+}
+
+static struct drm_crtc_helper_funcs intel_helper_funcs = {
+	.dpms = intel_crtc_dpms,
+	.mode_fixup = intel_crtc_mode_fixup,
+	.mode_set = intel_crtc_mode_set,
+	.mode_set_base = intel_pipe_set_base,
+	.mode_set_base_atomic = intel_pipe_set_base_atomic,
+	.load_lut = intel_crtc_load_lut,
+	.disable = intel_crtc_disable,
+};
+
+static const struct drm_crtc_funcs intel_crtc_funcs = {
+	.reset = intel_crtc_reset,
+	.cursor_set = intel_crtc_cursor_set,
+	.cursor_move = intel_crtc_cursor_move,
+	.gamma_set = intel_crtc_gamma_set,
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = intel_crtc_destroy,
+	.page_flip = intel_crtc_page_flip,
+};
+
 static void intel_crtc_init(struct drm_device *dev, int pipe)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -6330,8 +6339,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 
 	setup_timer(&intel_crtc->idle_timer, intel_crtc_idle_timer,
 		    (unsigned long)intel_crtc);
-
-	intel_sanitize_modesetting(dev, intel_crtc->pipe, intel_crtc->plane);
 }
 
 int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
@@ -6572,8 +6579,10 @@ intel_user_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOENT);
 
 	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
-	if (!intel_fb)
+	if (!intel_fb) {
+		drm_gem_object_unreference_unlocked(&obj->base);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index cb8578b7e443..a4d80314e7f8 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1470,7 +1470,8 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 
 	if (!HAS_PCH_CPT(dev) &&
 	    I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) {
-		struct intel_crtc *intel_crtc = to_intel_crtc(intel_dp->base.base.crtc);
+		struct drm_crtc *crtc = intel_dp->base.base.crtc;
+
 		/* Hardware workaround: leaving our transcoder select
 		 * set to transcoder B while it's off will prevent the
 		 * corresponding HDMI output on transcoder A.
@@ -1485,7 +1486,19 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 		/* Changes to enable or select take place the vblank
 		 * after being written.
 		 */
-		intel_wait_for_vblank(dev, intel_crtc->pipe);
+		if (crtc == NULL) {
+			/* We can arrive here never having been attached
+			 * to a CRTC, for instance, due to inheriting
+			 * random state from the BIOS.
+			 *
+			 * If the pipe is not running, play safe and
+			 * wait for the clocks to stabilise before
+			 * continuing.
+			 */
+			POSTING_READ(intel_dp->output_reg);
+			msleep(50);
+		} else
+			intel_wait_for_vblank(dev, to_intel_crtc(crtc)->pipe);
 	}
 
 	I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index f5b0d8306d83..1d20712d527f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -338,4 +338,5 @@ extern int intel_overlay_attrs(struct drm_device *dev, void *data,
 			       struct drm_file *file_priv);
 
 extern void intel_fb_output_poll_changed(struct drm_device *dev);
+extern void intel_fb_restore_mode(struct drm_device *dev);
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 512782728e51..ec49bae73382 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -264,3 +264,13 @@ void intel_fb_output_poll_changed(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper);
 }
+
+void intel_fb_restore_mode(struct drm_device *dev)
+{
+	int ret;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	ret = drm_fb_helper_restore_fbdev_mode(&dev_priv->fbdev->helper);
+	if (ret)
+		DRM_DEBUG("failed to restore crtc mode\n");
+}
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index a562bd2648c7..67cb076d271b 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -539,6 +539,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_connector *connector = dev_priv->int_lvds_connector;
 
+	if (dev->switch_power_state != DRM_SWITCH_POWER_ON)
+		return NOTIFY_OK;
+
 	/*
 	 * check and update the status of LVDS connector after receiving
 	 * the LID nofication event.
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 4256b8ef3947..6b22c1dcc015 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1151,10 +1151,10 @@ intel_tv_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 			    (video_levels->blank << TV_BLANK_LEVEL_SHIFT)));
 	{
 		int pipeconf_reg = PIPECONF(pipe);
-		int dspcntr_reg = DSPCNTR(pipe);
+		int dspcntr_reg = DSPCNTR(intel_crtc->plane);
 		int pipeconf = I915_READ(pipeconf_reg);
 		int dspcntr = I915_READ(dspcntr_reg);
-		int dspbase_reg = DSPADDR(pipe);
+		int dspbase_reg = DSPADDR(intel_crtc->plane);
 		int xpos = 0x0, ypos = 0x0;
 		unsigned int xsize, ysize;
 		/* Pipe must be off here */
@@ -1378,7 +1378,9 @@ intel_tv_detect(struct drm_connector *connector, bool force)
 	if (type < 0)
 		return connector_status_disconnected;
 
+	intel_tv->type = type;
 	intel_tv_find_better_format(connector);
+
 	return connector_status_connected;
 }
 
@@ -1670,8 +1672,7 @@ intel_tv_init(struct drm_device *dev)
 	 *
 	 * More recent chipsets favour HDMI rather than integrated S-Video.
 	 */
-	connector->polled =
-		DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT;
+	connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 
 	drm_connector_init(dev, connector, &intel_tv_connector_funcs,
 			   DRM_MODE_CONNECTOR_SVIDEO);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 8314a49b6b9a..90aef64b76f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -269,7 +269,7 @@ struct init_tbl_entry {
 	int (*handler)(struct nvbios *, uint16_t, struct init_exec *);
 };
 
-static int parse_init_table(struct nvbios *, unsigned int, struct init_exec *);
+static int parse_init_table(struct nvbios *, uint16_t, struct init_exec *);
 
 #define MACRO_INDEX_SIZE	2
 #define MACRO_SIZE		8
@@ -2011,6 +2011,27 @@ init_sub_direct(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 }
 
 static int
+init_jump(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
+{
+	/*
+	 * INIT_JUMP   opcode: 0x5C ('\')
+	 *
+	 * offset      (8  bit): opcode
+	 * offset + 1  (16 bit): offset (in bios)
+	 *
+	 * Continue execution of init table from 'offset'
+	 */
+
+	uint16_t jmp_offset = ROM16(bios->data[offset + 1]);
+
+	if (!iexec->execute)
+		return 3;
+
+	BIOSLOG(bios, "0x%04X: Jump to 0x%04X\n", offset, jmp_offset);
+	return jmp_offset - offset;
+}
+
+static int
 init_i2c_if(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -3659,6 +3680,7 @@ static struct init_tbl_entry itbl_entry[] = {
 	{ "INIT_ZM_REG_SEQUENCE"              , 0x58, init_zm_reg_sequence            },
 	/* INIT_INDIRECT_REG (0x5A, 7, 0, 0) removed due to no example of use */
 	{ "INIT_SUB_DIRECT"                   , 0x5B, init_sub_direct                 },
+	{ "INIT_JUMP"                         , 0x5C, init_jump                       },
 	{ "INIT_I2C_IF"                       , 0x5E, init_i2c_if                     },
 	{ "INIT_COPY_NV_REG"                  , 0x5F, init_copy_nv_reg                },
 	{ "INIT_ZM_INDEX_IO"                  , 0x62, init_zm_index_io                },
@@ -3700,8 +3722,7 @@ static struct init_tbl_entry itbl_entry[] = {
 #define MAX_TABLE_OPS 1000
 
 static int
-parse_init_table(struct nvbios *bios, unsigned int offset,
-		 struct init_exec *iexec)
+parse_init_table(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
 	 * Parses all commands in an init table.
@@ -6333,6 +6354,32 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf)
 		}
 	}
 
+	/* XFX GT-240X-YA
+	 *
+	 * So many things wrong here, replace the entire encoder table..
+	 */
+	if (nv_match_device(dev, 0x0ca3, 0x1682, 0x3003)) {
+		if (idx == 0) {
+			*conn = 0x02001300; /* VGA, connector 1 */
+			*conf = 0x00000028;
+		} else
+		if (idx == 1) {
+			*conn = 0x01010312; /* DVI, connector 0 */
+			*conf = 0x00020030;
+		} else
+		if (idx == 2) {
+			*conn = 0x01010310; /* VGA, connector 0 */
+			*conf = 0x00000028;
+		} else
+		if (idx == 3) {
+			*conn = 0x02022362; /* HDMI, connector 2 */
+			*conf = 0x00020010;
+		} else {
+			*conn = 0x0000000e; /* EOL */
+			*conf = 0x00000000;
+		}
+	}
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index ce38e97b9428..568caedd7216 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -83,7 +83,7 @@ nouveau_dma_init(struct nouveau_channel *chan)
 		return ret;
 
 	/* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
-	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfd0, 0x1000,
+	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfe0, 0x1000,
 				     &chan->m2mf_ntfy);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 57e5302503db..a76514a209b3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -682,6 +682,9 @@ struct drm_nouveau_private {
 	/* For PFIFO and PGRAPH. */
 	spinlock_t context_switch_lock;
 
+	/* VM/PRAMIN flush, legacy PRAMIN aperture */
+	spinlock_t vm_lock;
+
 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
 	struct nouveau_ramht  *ramht;
 	struct nouveau_gpuobj *ramfc;
@@ -1190,7 +1193,7 @@ extern int  nv50_graph_load_context(struct nouveau_channel *);
 extern int  nv50_graph_unload_context(struct drm_device *);
 extern int  nv50_grctx_init(struct nouveau_grctx *);
 extern void nv50_graph_tlb_flush(struct drm_device *dev);
-extern void nv86_graph_tlb_flush(struct drm_device *dev);
+extern void nv84_graph_tlb_flush(struct drm_device *dev);
 extern struct nouveau_enum nv50_data_error_names[];
 
 /* nvc0_graph.c */
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 889c4454682e..39aee6d4daf8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -181,13 +181,13 @@ nouveau_fbcon_sync(struct fb_info *info)
 		OUT_RING  (chan, 0);
 	}
 
-	nouveau_bo_wr32(chan->notifier_bo, chan->m2mf_ntfy + 3, 0xffffffff);
+	nouveau_bo_wr32(chan->notifier_bo, chan->m2mf_ntfy/4 + 3, 0xffffffff);
 	FIRE_RING(chan);
 	mutex_unlock(&chan->mutex);
 
 	ret = -EBUSY;
 	for (i = 0; i < 100000; i++) {
-		if (!nouveau_bo_rd32(chan->notifier_bo, chan->m2mf_ntfy + 3)) {
+		if (!nouveau_bo_rd32(chan->notifier_bo, chan->m2mf_ntfy/4 + 3)) {
 			ret = 0;
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 2683377f4131..c3e953b08992 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -152,8 +152,6 @@ nouveau_mem_vram_fini(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	nouveau_bo_ref(NULL, &dev_priv->vga_ram);
-
 	ttm_bo_device_release(&dev_priv->ttm.bdev);
 
 	nouveau_ttm_global_release(dev_priv);
@@ -398,7 +396,7 @@ nouveau_mem_vram_init(struct drm_device *dev)
 			dma_bits = 40;
 	} else
 	if (drm_pci_device_is_pcie(dev) &&
-	    dev_priv->chipset != 0x40 &&
+	    dev_priv->chipset  > 0x40 &&
 	    dev_priv->chipset != 0x45) {
 		if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(39)))
 			dma_bits = 39;
@@ -552,6 +550,7 @@ nouveau_mem_timing_init(struct drm_device *dev)
 	u8 tRC;		/* Byte 9 */
 	u8 tUNK_10, tUNK_11, tUNK_12, tUNK_13, tUNK_14;
 	u8 tUNK_18, tUNK_19, tUNK_20, tUNK_21;
+	u8 magic_number = 0; /* Yeah... sorry*/
 	u8 *mem = NULL, *entry;
 	int i, recordlen, entries;
 
@@ -596,6 +595,12 @@ nouveau_mem_timing_init(struct drm_device *dev)
 	if (!memtimings->timing)
 		return;
 
+	/* Get "some number" from the timing reg for NV_40
+	 * Used in calculations later */
+	if(dev_priv->card_type == NV_40) {
+		magic_number = (nv_rd32(dev,0x100228) & 0x0f000000) >> 24;
+	}
+
 	entry = mem + mem[1];
 	for (i = 0; i < entries; i++, entry += recordlen) {
 		struct nouveau_pm_memtiming *timing = &pm->memtimings.timing[i];
@@ -635,36 +640,51 @@ nouveau_mem_timing_init(struct drm_device *dev)
 
 		/* XXX: I don't trust the -1's and +1's... they must come
 		 *      from somewhere! */
-		timing->reg_100224 = ((tUNK_0 + tUNK_19 + 1) << 24 |
+		timing->reg_100224 = (tUNK_0 + tUNK_19 + 1 + magic_number) << 24 |
 				      tUNK_18 << 16 |
-				      (tUNK_1 + tUNK_19 + 1) << 8 |
-				      (tUNK_2 - 1));
+				      (tUNK_1 + tUNK_19 + 1 + magic_number) << 8;
+		if(dev_priv->chipset == 0xa8) {
+			timing->reg_100224 |= (tUNK_2 - 1);
+		} else {
+			timing->reg_100224 |= (tUNK_2 + 2 - magic_number);
+		}
 
 		timing->reg_100228 = (tUNK_12 << 16 | tUNK_11 << 8 | tUNK_10);
-		if(recordlen > 19) {
-			timing->reg_100228 += (tUNK_19 - 1) << 24;
-		}/* I cannot back-up this else-statement right now
-			 else {
-			timing->reg_100228 += tUNK_12 << 24;
-		}*/
-
-		/* XXX: reg_10022c */
-		timing->reg_10022c = tUNK_2 - 1;
-
-		timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
-				      tUNK_13 << 8  | tUNK_13);
-
-		/* XXX: +6? */
-		timing->reg_100234 = (tRAS << 24 | (tUNK_19 + 6) << 8 | tRC);
-		timing->reg_100234 += max(tUNK_10,tUNK_11) << 16;
-
-		/* XXX; reg_100238, reg_10023c
-		 * reg: 0x00??????
-		 * reg_10023c:
-		 *      0 for pre-NV50 cards
-		 *      0x????0202 for NV50+ cards (empirical evidence) */
-		if(dev_priv->card_type >= NV_50) {
+		if(dev_priv->chipset >= 0xa3 && dev_priv->chipset < 0xaa) {
+			timing->reg_100228 |= (tUNK_19 - 1) << 24;
+		}
+
+		if(dev_priv->card_type == NV_40) {
+			/* NV40: don't know what the rest of the regs are..
+			 * And don't need to know either */
+			timing->reg_100228 |= 0x20200000 | magic_number << 24;
+		} else if(dev_priv->card_type >= NV_50) {
+			/* XXX: reg_10022c */
+			timing->reg_10022c = tUNK_2 - 1;
+
+			timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
+						  tUNK_13 << 8  | tUNK_13);
+
+			timing->reg_100234 = (tRAS << 24 | tRC);
+			timing->reg_100234 += max(tUNK_10,tUNK_11) << 16;
+
+			if(dev_priv->chipset < 0xa3) {
+				timing->reg_100234 |= (tUNK_2 + 2) << 8;
+			} else {
+				/* XXX: +6? */
+				timing->reg_100234 |= (tUNK_19 + 6) << 8;
+			}
+
+			/* XXX; reg_100238, reg_10023c
+			 * reg_100238: 0x00??????
+			 * reg_10023c: 0x!!??0202 for NV50+ cards (empirical evidence) */
 			timing->reg_10023c = 0x202;
+			if(dev_priv->chipset < 0xa3) {
+				timing->reg_10023c |= 0x4000000 | (tUNK_2 - 1) << 16;
+			} else {
+				/* currently unknown
+				 * 10023c seen as 06xxxxxx, 0bxxxxxx or 0fxxxxxx */
+			}
 		}
 
 		NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", i,
@@ -675,7 +695,7 @@ nouveau_mem_timing_init(struct drm_device *dev)
 			 timing->reg_100238, timing->reg_10023c);
 	}
 
-	memtimings->nr_timing  = entries;
+	memtimings->nr_timing = entries;
 	memtimings->supported = true;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index 7ba3fc0b30c1..5b39718ae1f8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -35,19 +35,22 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct nouveau_bo *ntfy = NULL;
-	uint32_t flags;
+	uint32_t flags, ttmpl;
 	int ret;
 
-	if (nouveau_vram_notify)
+	if (nouveau_vram_notify) {
 		flags = NOUVEAU_GEM_DOMAIN_VRAM;
-	else
+		ttmpl = TTM_PL_FLAG_VRAM;
+	} else {
 		flags = NOUVEAU_GEM_DOMAIN_GART;
+		ttmpl = TTM_PL_FLAG_TT;
+	}
 
 	ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, flags, 0, 0, &ntfy);
 	if (ret)
 		return ret;
 
-	ret = nouveau_bo_pin(ntfy, flags);
+	ret = nouveau_bo_pin(ntfy, ttmpl);
 	if (ret)
 		goto out_err;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 4f00c87ed86e..67a16e01ffa6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -1039,19 +1039,20 @@ nv_ro32(struct nouveau_gpuobj *gpuobj, u32 offset)
 {
 	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
 	struct drm_device *dev = gpuobj->dev;
+	unsigned long flags;
 
 	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
 		u64  ptr = gpuobj->vinst + offset;
 		u32 base = ptr >> 16;
 		u32  val;
 
-		spin_lock(&dev_priv->ramin_lock);
+		spin_lock_irqsave(&dev_priv->vm_lock, flags);
 		if (dev_priv->ramin_base != base) {
 			dev_priv->ramin_base = base;
 			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
 		}
 		val = nv_rd32(dev, 0x700000 + (ptr & 0xffff));
-		spin_unlock(&dev_priv->ramin_lock);
+		spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 		return val;
 	}
 
@@ -1063,18 +1064,19 @@ nv_wo32(struct nouveau_gpuobj *gpuobj, u32 offset, u32 val)
 {
 	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
 	struct drm_device *dev = gpuobj->dev;
+	unsigned long flags;
 
 	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
 		u64  ptr = gpuobj->vinst + offset;
 		u32 base = ptr >> 16;
 
-		spin_lock(&dev_priv->ramin_lock);
+		spin_lock_irqsave(&dev_priv->vm_lock, flags);
 		if (dev_priv->ramin_base != base) {
 			dev_priv->ramin_base = base;
 			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
 		}
 		nv_wr32(dev, 0x700000 + (ptr & 0xffff), val);
-		spin_unlock(&dev_priv->ramin_lock);
+		spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_perf.c b/drivers/gpu/drm/nouveau/nouveau_perf.c
index ac62a1b8c4fc..670e3cb697ec 100644
--- a/drivers/gpu/drm/nouveau/nouveau_perf.c
+++ b/drivers/gpu/drm/nouveau/nouveau_perf.c
@@ -134,7 +134,7 @@ nouveau_perf_init(struct drm_device *dev)
 		case 0x13:
 		case 0x15:
 			perflvl->fanspeed = entry[55];
-			perflvl->voltage = entry[56];
+			perflvl->voltage = (recordlen > 56) ? entry[56] : 0;
 			perflvl->core = ROM32(entry[1]) * 10;
 			perflvl->memory = ROM32(entry[5]) * 20;
 			break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index a33fe4019286..c77111eca6ac 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -42,7 +42,8 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
 
 	nvbe->nr_pages = 0;
 	while (num_pages--) {
-		if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
+		/* this code path isn't called and is incorrect anyways */
+		if (0) { /*dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE)*/
 			nvbe->pages[nvbe->nr_pages] =
 					dma_addrs[nvbe->nr_pages];
 		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
@@ -55,6 +56,7 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
 				be->func->clear(be);
 				return -EFAULT;
 			}
+			nvbe->ttm_alloced[nvbe->nr_pages] = false;
 		}
 
 		nvbe->nr_pages++;
@@ -427,7 +429,7 @@ nouveau_sgdma_init(struct drm_device *dev)
 	u32 aper_size, align;
 	int ret;
 
-	if (dev_priv->card_type >= NV_50 || drm_pci_device_is_pcie(dev))
+	if (dev_priv->card_type >= NV_40 && drm_pci_device_is_pcie(dev))
 		aper_size = 512 * 1024 * 1024;
 	else
 		aper_size = 64 * 1024 * 1024;
@@ -457,7 +459,7 @@ nouveau_sgdma_init(struct drm_device *dev)
 		dev_priv->gart_info.func = &nv50_sgdma_backend;
 	} else
 	if (drm_pci_device_is_pcie(dev) &&
-	    dev_priv->chipset != 0x40 && dev_priv->chipset != 0x45) {
+	    dev_priv->chipset > 0x40 && dev_priv->chipset != 0x45) {
 		if (nv44_graph_class(dev)) {
 			dev_priv->gart_info.func = &nv44_sgdma_backend;
 			align = 512 * 1024;
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 5bb2859001e2..915fbce89595 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -376,15 +376,11 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->graph.destroy_context	= nv50_graph_destroy_context;
 		engine->graph.load_context	= nv50_graph_load_context;
 		engine->graph.unload_context	= nv50_graph_unload_context;
-		if (dev_priv->chipset != 0x86)
+		if (dev_priv->chipset == 0x50 ||
+		    dev_priv->chipset == 0xac)
 			engine->graph.tlb_flush	= nv50_graph_tlb_flush;
-		else {
-			/* from what i can see nvidia do this on every
-			 * pre-NVA3 board except NVAC, but, we've only
-			 * ever seen problems on NV86
-			 */
-			engine->graph.tlb_flush	= nv86_graph_tlb_flush;
-		}
+		else
+			engine->graph.tlb_flush	= nv84_graph_tlb_flush;
 		engine->fifo.channels		= 128;
 		engine->fifo.init		= nv50_fifo_init;
 		engine->fifo.takedown		= nv50_fifo_takedown;
@@ -612,6 +608,7 @@ nouveau_card_init(struct drm_device *dev)
 	spin_lock_init(&dev_priv->channels.lock);
 	spin_lock_init(&dev_priv->tile.lock);
 	spin_lock_init(&dev_priv->context_switch_lock);
+	spin_lock_init(&dev_priv->vm_lock);
 
 	/* Make the CRTCs and I2C buses accessible */
 	ret = engine->display.early_init(dev);
@@ -771,6 +768,11 @@ static void nouveau_card_takedown(struct drm_device *dev)
 	engine->mc.takedown(dev);
 	engine->display.late_takedown(dev);
 
+	if (dev_priv->vga_ram) {
+		nouveau_bo_unpin(dev_priv->vga_ram);
+		nouveau_bo_ref(NULL, &dev_priv->vga_ram);
+	}
+
 	mutex_lock(&dev->struct_mutex);
 	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT);
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index c82db37d9f41..12098bf839c4 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -581,12 +581,13 @@ static void nv04_dfp_restore(struct drm_encoder *encoder)
 	int head = nv_encoder->restore.head;
 
 	if (nv_encoder->dcb->type == OUTPUT_LVDS) {
-		struct drm_display_mode *native_mode = nouveau_encoder_connector_get(nv_encoder)->native_mode;
-		if (native_mode)
-			call_lvds_script(dev, nv_encoder->dcb, head, LVDS_PANEL_ON,
-					 native_mode->clock);
-		else
-			NV_ERROR(dev, "Not restoring LVDS without native mode\n");
+		struct nouveau_connector *connector =
+			nouveau_encoder_connector_get(nv_encoder);
+
+		if (connector && connector->native_mode)
+			call_lvds_script(dev, nv_encoder->dcb, head,
+					 LVDS_PANEL_ON,
+					 connector->native_mode->clock);
 
 	} else if (nv_encoder->dcb->type == OUTPUT_TMDS) {
 		int clock = nouveau_hw_pllvals_to_clk
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 2b9984027f41..a19ccaa025b3 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -469,9 +469,6 @@ nv50_crtc_wait_complete(struct drm_crtc *crtc)
 
 	start = ptimer->read(dev);
 	do {
-		nv_wr32(dev, 0x61002c, 0x370);
-		nv_wr32(dev, 0x000140, 1);
-
 		if (nv_ro32(disp->ntfy, 0x000))
 			return 0;
 	} while (ptimer->read(dev) - start < 2000000000ULL);
diff --git a/drivers/gpu/drm/nouveau/nv50_evo.c b/drivers/gpu/drm/nouveau/nv50_evo.c
index a2cfaa691e9b..c8e83c1a4de8 100644
--- a/drivers/gpu/drm/nouveau/nv50_evo.c
+++ b/drivers/gpu/drm/nouveau/nv50_evo.c
@@ -186,6 +186,7 @@ nv50_evo_channel_init(struct nouveau_channel *evo)
 	nv_mask(dev, 0x610028, 0x00000000, 0x00010001 << id);
 
 	evo->dma.max = (4096/4) - 2;
+	evo->dma.max &= ~7;
 	evo->dma.put = 0;
 	evo->dma.cur = evo->dma.put;
 	evo->dma.free = evo->dma.max - evo->dma.cur;
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 8675b00caf18..b02a5b1e7d37 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -503,7 +503,7 @@ nv50_graph_tlb_flush(struct drm_device *dev)
 }
 
 void
-nv86_graph_tlb_flush(struct drm_device *dev)
+nv84_graph_tlb_flush(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index a6f8aa651fc6..4f95a1e5822e 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -404,23 +404,25 @@ void
 nv50_instmem_flush(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 
-	spin_lock(&dev_priv->ramin_lock);
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	nv_wr32(dev, 0x00330c, 0x00000001);
 	if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000))
 		NV_ERROR(dev, "PRAMIN flush timeout\n");
-	spin_unlock(&dev_priv->ramin_lock);
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
 
 void
 nv84_instmem_flush(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 
-	spin_lock(&dev_priv->ramin_lock);
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	nv_wr32(dev, 0x070000, 0x00000001);
 	if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
 		NV_ERROR(dev, "PRAMIN flush timeout\n");
-	spin_unlock(&dev_priv->ramin_lock);
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c
index 4fd3432b5b8d..6c2694490741 100644
--- a/drivers/gpu/drm/nouveau/nv50_vm.c
+++ b/drivers/gpu/drm/nouveau/nv50_vm.c
@@ -174,10 +174,11 @@ void
 nv50_vm_flush_engine(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 
-	spin_lock(&dev_priv->ramin_lock);
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	nv_wr32(dev, 0x100c80, (engine << 16) | 1);
 	if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000))
 		NV_ERROR(dev, "vm flush timeout: engine %d\n", engine);
-	spin_unlock(&dev_priv->ramin_lock);
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
diff --git a/drivers/gpu/drm/nouveau/nvc0_vm.c b/drivers/gpu/drm/nouveau/nvc0_vm.c
index 69af0ba7edd3..a179e6c55afb 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vm.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vm.c
@@ -104,20 +104,27 @@ nvc0_vm_flush(struct nouveau_vm *vm)
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
 	struct drm_device *dev = vm->dev;
 	struct nouveau_vm_pgd *vpgd;
-	u32 r100c80, engine;
+	unsigned long flags;
+	u32 engine = (dev_priv->chan_vm == vm) ? 1 : 5;
 
 	pinstmem->flush(vm->dev);
 
-	if (vm == dev_priv->chan_vm)
-		engine = 1;
-	else
-		engine = 5;
-
+	spin_lock_irqsave(&dev_priv->vm_lock, flags);
 	list_for_each_entry(vpgd, &vm->pgd_list, head) {
-		r100c80 = nv_rd32(dev, 0x100c80);
+		/* looks like maybe a "free flush slots" counter, the
+		 * faster you write to 0x100cbc to more it decreases
+		 */
+		if (!nv_wait_ne(dev, 0x100c80, 0x00ff0000, 0x00000000)) {
+			NV_ERROR(dev, "vm timeout 0: 0x%08x %d\n",
+				 nv_rd32(dev, 0x100c80), engine);
+		}
 		nv_wr32(dev, 0x100cb8, vpgd->obj->vinst >> 8);
 		nv_wr32(dev, 0x100cbc, 0x80000000 | engine);
-		if (!nv_wait(dev, 0x100c80, 0xffffffff, r100c80))
-			NV_ERROR(dev, "vm flush timeout eng %d\n", engine);
+		/* wait for flush to be queued? */
+		if (!nv_wait(dev, 0x100c80, 0x00008000, 0x00008000)) {
+			NV_ERROR(dev, "vm timeout 1: 0x%08x %d\n",
+				 nv_rd32(dev, 0x100c80), engine);
+		}
 	}
+	spin_unlock_irqrestore(&dev_priv->vm_lock, flags);
 }
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index 258fa5e7a2d9..7bd745689097 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -32,6 +32,7 @@
 #include "atom.h"
 #include "atom-names.h"
 #include "atom-bits.h"
+#include "radeon.h"
 
 #define ATOM_COND_ABOVE		0
 #define ATOM_COND_ABOVEOREQUAL	1
@@ -101,7 +102,9 @@ static void debug_print_spaces(int n)
 static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
 				 uint32_t index, uint32_t data)
 {
+	struct radeon_device *rdev = ctx->card->dev->dev_private;
 	uint32_t temp = 0xCDCDCDCD;
+
 	while (1)
 		switch (CU8(base)) {
 		case ATOM_IIO_NOP:
@@ -112,7 +115,8 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
 			base += 3;
 			break;
 		case ATOM_IIO_WRITE:
-			(void)ctx->card->ioreg_read(ctx->card, CU16(base + 1));
+			if (rdev->family == CHIP_RV515)
+				(void)ctx->card->ioreg_read(ctx->card, CU16(base + 1));
 			ctx->card->ioreg_write(ctx->card, CU16(base + 1), temp);
 			base += 3;
 			break;
@@ -131,7 +135,7 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
 		case ATOM_IIO_MOVE_INDEX:
 			temp &=
 			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
-			      CU8(base + 2));
+			      CU8(base + 3));
 			temp |=
 			    ((index >> CU8(base + 2)) &
 			     (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
@@ -141,7 +145,7 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
 		case ATOM_IIO_MOVE_DATA:
 			temp &=
 			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
-			      CU8(base + 2));
+			      CU8(base + 3));
 			temp |=
 			    ((data >> CU8(base + 2)) &
 			     (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
@@ -151,7 +155,7 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
 		case ATOM_IIO_MOVE_ATTR:
 			temp &=
 			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
-			      CU8(base + 2));
+			      CU8(base + 3));
 			temp |=
 			    ((ctx->
 			      io_attr >> CU8(base + 2)) & (0xFFFFFFFF >> (32 -
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index b41ec59c7100..529a3a704731 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -531,6 +531,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 			pll->flags |= RADEON_PLL_PREFER_HIGH_FB_DIV;
 		else
 			pll->flags |= RADEON_PLL_PREFER_LOW_REF_DIV;
+
+		if (rdev->family < CHIP_RV770)
+			pll->flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
 	} else {
 		pll->flags |= RADEON_PLL_LEGACY;
 
@@ -559,7 +562,6 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 				if (ss_enabled) {
 					if (ss->refdiv) {
-						pll->flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
 						pll->flags |= RADEON_PLL_USE_REF_DIV;
 						pll->reference_div = ss->refdiv;
 						if (ASIC_IS_AVIVO(rdev))
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 0b0cc74c08c0..9073e3bfb08c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -120,11 +120,16 @@ void evergreen_pm_misc(struct radeon_device *rdev)
 	struct radeon_power_state *ps = &rdev->pm.power_state[req_ps_idx];
 	struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage;
 
-	if ((voltage->type == VOLTAGE_SW) && voltage->voltage) {
-		if (voltage->voltage != rdev->pm.current_vddc) {
-			radeon_atom_set_voltage(rdev, voltage->voltage);
+	if (voltage->type == VOLTAGE_SW) {
+		if (voltage->voltage && (voltage->voltage != rdev->pm.current_vddc)) {
+			radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC);
 			rdev->pm.current_vddc = voltage->voltage;
-			DRM_DEBUG("Setting: v: %d\n", voltage->voltage);
+			DRM_DEBUG("Setting: vddc: %d\n", voltage->voltage);
+		}
+		if (voltage->vddci && (voltage->vddci != rdev->pm.current_vddci)) {
+			radeon_atom_set_voltage(rdev, voltage->vddci, SET_VOLTAGE_TYPE_ASIC_VDDCI);
+			rdev->pm.current_vddci = voltage->vddci;
+			DRM_DEBUG("Setting: vddci: %d\n", voltage->vddci);
 		}
 	}
 }
@@ -348,7 +353,7 @@ static u32 evergreen_line_buffer_adjust(struct radeon_device *rdev,
 					struct drm_display_mode *mode,
 					struct drm_display_mode *other_mode)
 {
-	u32 tmp = 0;
+	u32 tmp;
 	/*
 	 * Line Buffer Setup
 	 * There are 3 line buffers, each one shared by 2 display controllers.
@@ -358,64 +363,63 @@ static u32 evergreen_line_buffer_adjust(struct radeon_device *rdev,
 	 * first display controller
 	 *  0 - first half of lb (3840 * 2)
 	 *  1 - first 3/4 of lb (5760 * 2)
-	 *  2 - whole lb (7680 * 2)
+	 *  2 - whole lb (7680 * 2), other crtc must be disabled
 	 *  3 - first 1/4 of lb (1920 * 2)
 	 * second display controller
 	 *  4 - second half of lb (3840 * 2)
 	 *  5 - second 3/4 of lb (5760 * 2)
-	 *  6 - whole lb (7680 * 2)
+	 *  6 - whole lb (7680 * 2), other crtc must be disabled
 	 *  7 - last 1/4 of lb (1920 * 2)
 	 */
-	if (mode && other_mode) {
-		if (mode->hdisplay > other_mode->hdisplay) {
-			if (mode->hdisplay > 2560)
-				tmp = 1; /* 3/4 */
-			else
-				tmp = 0; /* 1/2 */
-		} else if (other_mode->hdisplay > mode->hdisplay) {
-			if (other_mode->hdisplay > 2560)
-				tmp = 3; /* 1/4 */
-			else
-				tmp = 0; /* 1/2 */
-		} else
+	/* this can get tricky if we have two large displays on a paired group
+	 * of crtcs.  Ideally for multiple large displays we'd assign them to
+	 * non-linked crtcs for maximum line buffer allocation.
+	 */
+	if (radeon_crtc->base.enabled && mode) {
+		if (other_mode)
 			tmp = 0; /* 1/2 */
-	} else if (mode)
-		tmp = 2; /* whole */
-	else if (other_mode)
-		tmp = 3; /* 1/4 */
+		else
+			tmp = 2; /* whole */
+	} else
+		tmp = 0;
 
 	/* second controller of the pair uses second half of the lb */
 	if (radeon_crtc->crtc_id % 2)
 		tmp += 4;
 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset, tmp);
 
-	switch (tmp) {
-	case 0:
-	case 4:
-	default:
-		if (ASIC_IS_DCE5(rdev))
-			return 4096 * 2;
-		else
-			return 3840 * 2;
-	case 1:
-	case 5:
-		if (ASIC_IS_DCE5(rdev))
-			return 6144 * 2;
-		else
-			return 5760 * 2;
-	case 2:
-	case 6:
-		if (ASIC_IS_DCE5(rdev))
-			return 8192 * 2;
-		else
-			return 7680 * 2;
-	case 3:
-	case 7:
-		if (ASIC_IS_DCE5(rdev))
-			return 2048 * 2;
-		else
-			return 1920 * 2;
+	if (radeon_crtc->base.enabled && mode) {
+		switch (tmp) {
+		case 0:
+		case 4:
+		default:
+			if (ASIC_IS_DCE5(rdev))
+				return 4096 * 2;
+			else
+				return 3840 * 2;
+		case 1:
+		case 5:
+			if (ASIC_IS_DCE5(rdev))
+				return 6144 * 2;
+			else
+				return 5760 * 2;
+		case 2:
+		case 6:
+			if (ASIC_IS_DCE5(rdev))
+				return 8192 * 2;
+			else
+				return 7680 * 2;
+		case 3:
+		case 7:
+			if (ASIC_IS_DCE5(rdev))
+				return 2048 * 2;
+			else
+				return 1920 * 2;
+		}
 	}
+
+	/* controller not enabled, so no lb used */
+	return 0;
 }
 
 static u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev)
@@ -858,9 +862,15 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev)
 		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
 		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
 		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
-	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
-	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
-	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	if (rdev->flags & RADEON_IS_IGP) {
+		WREG32(FUS_MC_VM_MD_L1_TLB0_CNTL, tmp);
+		WREG32(FUS_MC_VM_MD_L1_TLB1_CNTL, tmp);
+		WREG32(FUS_MC_VM_MD_L1_TLB2_CNTL, tmp);
+	} else {
+		WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+		WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+		WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	}
 	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
 	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
 	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
@@ -1770,7 +1780,10 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 
 
 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
-	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
+	if (rdev->flags & RADEON_IS_IGP)
+		mc_arb_ramcfg = RREG32(FUS_MC_ARB_RAMCFG);
+	else
+		mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
 
 	switch (rdev->config.evergreen.max_tile_pipes) {
 	case 1:
@@ -2576,7 +2589,7 @@ static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
 	u32 wptr, tmp;
 
 	if (rdev->wb.enabled)
-		wptr = rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4];
+		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
 	else
 		wptr = RREG32(IH_RB_WPTR);
 
@@ -2919,11 +2932,6 @@ static int evergreen_startup(struct radeon_device *rdev)
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
-	/* XXX: ontario has problems blitting to gart at the moment */
-	if (rdev->family == CHIP_PALM) {
-		rdev->asic->copy = NULL;
-		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-	}
 
 	/* allocate wb buffer */
 	r = radeon_wb_init(rdev);
@@ -3036,9 +3044,6 @@ int evergreen_init(struct radeon_device *rdev)
 {
 	int r;
 
-	r = radeon_dummy_page_init(rdev);
-	if (r)
-		return r;
 	/* This don't do much */
 	r = radeon_gem_init(rdev);
 	if (r)
@@ -3150,7 +3155,6 @@ void evergreen_fini(struct radeon_device *rdev)
 	radeon_atombios_fini(rdev);
 	kfree(rdev->bios);
 	rdev->bios = NULL;
-	radeon_dummy_page_fini(rdev);
 }
 
 static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 9aaa3f0c9372..fc40e0cc3451 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -200,6 +200,7 @@
 #define		BURSTLENGTH_SHIFT				9
 #define		BURSTLENGTH_MASK				0x00000200
 #define		CHANSIZE_OVERRIDE				(1 << 11)
+#define	FUS_MC_ARB_RAMCFG				0x2768
 #define	MC_VM_AGP_TOP					0x2028
 #define	MC_VM_AGP_BOT					0x202C
 #define	MC_VM_AGP_BASE					0x2030
@@ -221,6 +222,11 @@
 #define	MC_VM_MD_L1_TLB0_CNTL				0x2654
 #define	MC_VM_MD_L1_TLB1_CNTL				0x2658
 #define	MC_VM_MD_L1_TLB2_CNTL				0x265C
+
+#define	FUS_MC_VM_MD_L1_TLB0_CNTL			0x265C
+#define	FUS_MC_VM_MD_L1_TLB1_CNTL			0x2660
+#define	FUS_MC_VM_MD_L1_TLB2_CNTL			0x2664
+
 #define	MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x203C
 #define	MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2038
 #define	MC_VM_SYSTEM_APERTURE_LOW_ADDR			0x2034
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 7aade20f63a8..3d8a7634bbe9 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -674,7 +674,7 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 
 	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
 	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
-	cgts_tcc_disable = RREG32(CGTS_TCC_DISABLE);
+	cgts_tcc_disable = 0xff000000;
 	gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
 	gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG);
 	cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
@@ -871,7 +871,7 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 
 	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
 	smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
-	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.evergreen.sx_num_of_sets);
+	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
 	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
 
 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
@@ -887,20 +887,20 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 
 	WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
 
-	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_size / 4) - 1) |
-					POSITION_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_pos_size / 4) - 1) |
-					SMX_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_smx_size / 4) - 1)));
+	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
+					POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
+					SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
 
-	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.evergreen.sc_prim_fifo_size) |
-				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_hiz_tile_fifo_size) |
-				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_earlyz_tile_fifo_size)));
+	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
+				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
+				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
 
 
 	WREG32(VGT_NUM_INSTANCES, 1);
 
 	WREG32(CP_PERFMON_CNTL, 0);
 
-	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.evergreen.sq_num_cf_insts) |
+	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
 				  FETCH_FIFO_HIWATER(0x4) |
 				  DONE_FIFO_HIWATER(0xe0) |
 				  ALU_UPDATE_FIFO_HIWATER(0x8)));
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index be271c42de4d..6f27593901c7 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -587,7 +587,7 @@ void r600_pm_misc(struct radeon_device *rdev)
 
 	if ((voltage->type == VOLTAGE_SW) && voltage->voltage) {
 		if (voltage->voltage != rdev->pm.current_vddc) {
-			radeon_atom_set_voltage(rdev, voltage->voltage);
+			radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC);
 			rdev->pm.current_vddc = voltage->voltage;
 			DRM_DEBUG_DRIVER("Setting: v: %d\n", voltage->voltage);
 		}
@@ -2509,9 +2509,6 @@ int r600_init(struct radeon_device *rdev)
 {
 	int r;
 
-	r = radeon_dummy_page_init(rdev);
-	if (r)
-		return r;
 	if (r600_debugfs_mc_info_init(rdev)) {
 		DRM_ERROR("Failed to register debugfs file for mc !\n");
 	}
@@ -2625,7 +2622,6 @@ void r600_fini(struct radeon_device *rdev)
 	radeon_atombios_fini(rdev);
 	kfree(rdev->bios);
 	rdev->bios = NULL;
-	radeon_dummy_page_fini(rdev);
 }
 
 
@@ -3235,7 +3231,7 @@ static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
 	u32 wptr, tmp;
 
 	if (rdev->wb.enabled)
-		wptr = rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4];
+		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
 	else
 		wptr = RREG32(IH_RB_WPTR);
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 93f536594c73..ba643b576054 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -177,7 +177,7 @@ void radeon_pm_suspend(struct radeon_device *rdev);
 void radeon_pm_resume(struct radeon_device *rdev);
 void radeon_combios_get_power_modes(struct radeon_device *rdev);
 void radeon_atombios_get_power_modes(struct radeon_device *rdev);
-void radeon_atom_set_voltage(struct radeon_device *rdev, u16 level);
+void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
 void rs690_pm_info(struct radeon_device *rdev);
 extern int rv6xx_get_temp(struct radeon_device *rdev);
 extern int rv770_get_temp(struct radeon_device *rdev);
@@ -767,7 +767,9 @@ struct radeon_voltage {
 	u8 vddci_id; /* index into vddci voltage table */
 	bool vddci_enabled;
 	/* r6xx+ sw */
-	u32 voltage;
+	u16 voltage;
+	/* evergreen+ vddci */
+	u16 vddci;
 };
 
 /* clock mode flags */
@@ -835,10 +837,12 @@ struct radeon_pm {
 	int                     default_power_state_index;
 	u32                     current_sclk;
 	u32                     current_mclk;
-	u32                     current_vddc;
+	u16                     current_vddc;
+	u16                     current_vddci;
 	u32                     default_sclk;
 	u32                     default_mclk;
-	u32                     default_vddc;
+	u16                     default_vddc;
+	u16                     default_vddci;
 	struct radeon_i2c_chan *i2c_bus;
 	/* selected pm method */
 	enum radeon_pm_method     pm_method;
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index eb888ee5f674..ca576191d058 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -94,7 +94,7 @@ static void radeon_register_accessor_init(struct radeon_device *rdev)
 		rdev->mc_rreg = &rs600_mc_rreg;
 		rdev->mc_wreg = &rs600_mc_wreg;
 	}
-	if ((rdev->family >= CHIP_R600) && (rdev->family <= CHIP_HEMLOCK)) {
+	if (rdev->family >= CHIP_R600) {
 		rdev->pciep_rreg = &r600_pciep_rreg;
 		rdev->pciep_wreg = &r600_pciep_wreg;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 99768d9d91da..90dfb2b8cf03 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -431,7 +431,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 		}
 	}
 
-	/* Acer laptop (Acer TravelMate 5730G) has an HDMI port
+	/* Acer laptop (Acer TravelMate 5730/5730G) has an HDMI port
 	 * on the laptop and a DVI port on the docking station and
 	 * both share the same encoder, hpd pin, and ddc line.
 	 * So while the bios table is technically correct,
@@ -440,7 +440,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 	 * with different crtcs which isn't possible on the hardware
 	 * side and leaves no crtcs for LVDS or VGA.
 	 */
-	if ((dev->pdev->device == 0x95c4) &&
+	if (((dev->pdev->device == 0x95c4) || (dev->pdev->device == 0x9591)) &&
 	    (dev->pdev->subsystem_vendor == 0x1025) &&
 	    (dev->pdev->subsystem_device == 0x013c)) {
 		if ((*connector_type == DRM_MODE_CONNECTOR_DVII) &&
@@ -1574,9 +1574,17 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct
 			ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record;
 			ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record;
 			bool bad_record = false;
-			u8 *record = (u8 *)(mode_info->atom_context->bios +
-					    data_offset +
-					    le16_to_cpu(lvds_info->info.usModePatchTableOffset));
+			u8 *record;
+
+			if ((frev == 1) && (crev < 2))
+				/* absolute */
+				record = (u8 *)(mode_info->atom_context->bios +
+						le16_to_cpu(lvds_info->info.usModePatchTableOffset));
+			else
+				/* relative */
+				record = (u8 *)(mode_info->atom_context->bios +
+						data_offset +
+						le16_to_cpu(lvds_info->info.usModePatchTableOffset));
 			while (*record != ATOM_RECORD_END_TYPE) {
 				switch (*record) {
 				case LCD_MODE_PATCH_RECORD_MODE_TYPE:
@@ -1599,9 +1607,10 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct
 							memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0],
 							       fake_edid_record->ucFakeEDIDLength);
 
-							if (drm_edid_is_valid(edid))
+							if (drm_edid_is_valid(edid)) {
 								rdev->mode_info.bios_hardcoded_edid = edid;
-							else
+								rdev->mode_info.bios_hardcoded_edid_size = edid_size;
+							} else
 								kfree(edid);
 						}
 					}
@@ -2176,24 +2185,27 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r
 	}
 }
 
-static u16 radeon_atombios_get_default_vddc(struct radeon_device *rdev)
+static void radeon_atombios_get_default_voltages(struct radeon_device *rdev,
+						 u16 *vddc, u16 *vddci)
 {
 	struct radeon_mode_info *mode_info = &rdev->mode_info;
 	int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
 	u8 frev, crev;
 	u16 data_offset;
 	union firmware_info *firmware_info;
-	u16 vddc = 0;
+
+	*vddc = 0;
+	*vddci = 0;
 
 	if (atom_parse_data_header(mode_info->atom_context, index, NULL,
 				   &frev, &crev, &data_offset)) {
 		firmware_info =
 			(union firmware_info *)(mode_info->atom_context->bios +
 						data_offset);
-		vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
+		*vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
+		if ((frev == 2) && (crev >= 2))
+			*vddci = le16_to_cpu(firmware_info->info_22.usBootUpVDDCIVoltage);
 	}
-
-	return vddc;
 }
 
 static void radeon_atombios_parse_pplib_non_clock_info(struct radeon_device *rdev,
@@ -2203,7 +2215,9 @@ static void radeon_atombios_parse_pplib_non_clock_info(struct radeon_device *rde
 	int j;
 	u32 misc = le32_to_cpu(non_clock_info->ulCapsAndSettings);
 	u32 misc2 = le16_to_cpu(non_clock_info->usClassification);
-	u16 vddc = radeon_atombios_get_default_vddc(rdev);
+	u16 vddc, vddci;
+
+	radeon_atombios_get_default_voltages(rdev, &vddc, &vddci);
 
 	rdev->pm.power_state[state_index].misc = misc;
 	rdev->pm.power_state[state_index].misc2 = misc2;
@@ -2244,6 +2258,7 @@ static void radeon_atombios_parse_pplib_non_clock_info(struct radeon_device *rde
 			rdev->pm.default_sclk = rdev->pm.power_state[state_index].clock_info[0].sclk;
 			rdev->pm.default_mclk = rdev->pm.power_state[state_index].clock_info[0].mclk;
 			rdev->pm.default_vddc = rdev->pm.power_state[state_index].clock_info[0].voltage.voltage;
+			rdev->pm.default_vddci = rdev->pm.power_state[state_index].clock_info[0].voltage.vddci;
 		} else {
 			/* patch the table values with the default slck/mclk from firmware info */
 			for (j = 0; j < mode_index; j++) {
@@ -2286,6 +2301,8 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev,
 			VOLTAGE_SW;
 		rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage =
 			le16_to_cpu(clock_info->evergreen.usVDDC);
+		rdev->pm.power_state[state_index].clock_info[mode_index].voltage.vddci =
+			le16_to_cpu(clock_info->evergreen.usVDDCI);
 	} else {
 		sclk = le16_to_cpu(clock_info->r600.usEngineClockLow);
 		sclk |= clock_info->r600.ucEngineClockHigh << 16;
@@ -2577,25 +2594,25 @@ union set_voltage {
 	struct _SET_VOLTAGE_PARAMETERS_V2 v2;
 };
 
-void radeon_atom_set_voltage(struct radeon_device *rdev, u16 level)
+void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type)
 {
 	union set_voltage args;
 	int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
-	u8 frev, crev, volt_index = level;
+	u8 frev, crev, volt_index = voltage_level;
 
 	if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
 		return;
 
 	switch (crev) {
 	case 1:
-		args.v1.ucVoltageType = SET_VOLTAGE_TYPE_ASIC_VDDC;
+		args.v1.ucVoltageType = voltage_type;
 		args.v1.ucVoltageMode = SET_ASIC_VOLTAGE_MODE_ALL_SOURCE;
 		args.v1.ucVoltageIndex = volt_index;
 		break;
 	case 2:
-		args.v2.ucVoltageType = SET_VOLTAGE_TYPE_ASIC_VDDC;
+		args.v2.ucVoltageType = voltage_type;
 		args.v2.ucVoltageMode = SET_ASIC_VOLTAGE_MODE_SET_VOLTAGE;
-		args.v2.usVoltageLevel = cpu_to_le16(level);
+		args.v2.usVoltageLevel = cpu_to_le16(voltage_level);
 		break;
 	default:
 		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index ed5dfe58f29c..9d95792bea3e 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -15,6 +15,9 @@
 #define ATPX_VERSION 0
 #define ATPX_GPU_PWR 2
 #define ATPX_MUX_SELECT 3
+#define ATPX_I2C_MUX_SELECT 4
+#define ATPX_SWITCH_START 5
+#define ATPX_SWITCH_END 6
 
 #define ATPX_INTEGRATED 0
 #define ATPX_DISCRETE 1
@@ -149,13 +152,35 @@ static int radeon_atpx_switch_mux(acpi_handle handle, int mux_id)
 	return radeon_atpx_execute(handle, ATPX_MUX_SELECT, mux_id);
 }
 
+static int radeon_atpx_switch_i2c_mux(acpi_handle handle, int mux_id)
+{
+	return radeon_atpx_execute(handle, ATPX_I2C_MUX_SELECT, mux_id);
+}
+
+static int radeon_atpx_switch_start(acpi_handle handle, int gpu_id)
+{
+	return radeon_atpx_execute(handle, ATPX_SWITCH_START, gpu_id);
+}
+
+static int radeon_atpx_switch_end(acpi_handle handle, int gpu_id)
+{
+	return radeon_atpx_execute(handle, ATPX_SWITCH_END, gpu_id);
+}
 
 static int radeon_atpx_switchto(enum vga_switcheroo_client_id id)
 {
+	int gpu_id;
+
 	if (id == VGA_SWITCHEROO_IGD)
-		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 0);
+		gpu_id = ATPX_INTEGRATED;
 	else
-		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 1);
+		gpu_id = ATPX_DISCRETE;
+
+	radeon_atpx_switch_start(radeon_atpx_priv.atpx_handle, gpu_id);
+	radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, gpu_id);
+	radeon_atpx_switch_i2c_mux(radeon_atpx_priv.atpx_handle, gpu_id);
+	radeon_atpx_switch_end(radeon_atpx_priv.atpx_handle, gpu_id);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 2ef6d5135064..5f45fa12bb8b 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1199,7 +1199,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 	if (router->ddc_valid || router->cd_valid) {
 		radeon_connector->router_bus = radeon_i2c_lookup(rdev, &router->i2c_info);
 		if (!radeon_connector->router_bus)
-			goto failed;
+			DRM_ERROR("Failed to assign router i2c bus! Check dmesg for i2c errors.\n");
 	}
 	switch (connector_type) {
 	case DRM_MODE_CONNECTOR_VGA:
@@ -1208,7 +1208,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		radeon_connector->dac_load_detect = true;
 		drm_connector_attach_property(&radeon_connector->base,
@@ -1226,7 +1226,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		radeon_connector->dac_load_detect = true;
 		drm_connector_attach_property(&radeon_connector->base,
@@ -1249,7 +1249,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		subpixel_order = SubPixelHorizontalRGB;
 		drm_connector_attach_property(&radeon_connector->base,
@@ -1290,7 +1290,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		drm_connector_attach_property(&radeon_connector->base,
 					      rdev->mode_info.coherent_mode_property,
@@ -1329,10 +1329,10 @@ radeon_add_atom_connector(struct drm_device *dev,
 			else
 				radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "DP-auxch");
 			if (!radeon_dig_connector->dp_i2c_bus)
-				goto failed;
+				DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n");
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		subpixel_order = SubPixelHorizontalRGB;
 		drm_connector_attach_property(&radeon_connector->base,
@@ -1381,7 +1381,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		drm_connector_attach_property(&radeon_connector->base,
 					      dev->mode_config.scaling_mode_property,
@@ -1457,7 +1457,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		radeon_connector->dac_load_detect = true;
 		drm_connector_attach_property(&radeon_connector->base,
@@ -1475,7 +1475,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		radeon_connector->dac_load_detect = true;
 		drm_connector_attach_property(&radeon_connector->base,
@@ -1493,7 +1493,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		if (connector_type == DRM_MODE_CONNECTOR_DVII) {
 			radeon_connector->dac_load_detect = true;
@@ -1538,7 +1538,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				goto failed;
+				DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
 		drm_connector_attach_property(&radeon_connector->base,
 					      dev->mode_config.scaling_mode_property,
@@ -1567,9 +1567,4 @@ radeon_add_legacy_connector(struct drm_device *dev,
 				radeon_legacy_backlight_init(radeon_encoder, connector);
 		}
 	}
-	return;
-
-failed:
-	drm_connector_cleanup(connector);
-	kfree(connector);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index bdf2fa1189ae..3189a7efb2e9 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -167,9 +167,6 @@ int radeon_crtc_cursor_set(struct drm_crtc *crtc,
 		return -EINVAL;
 	}
 
-	radeon_crtc->cursor_width = width;
-	radeon_crtc->cursor_height = height;
-
 	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
 	if (!obj) {
 		DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, radeon_crtc->crtc_id);
@@ -180,6 +177,9 @@ int radeon_crtc_cursor_set(struct drm_crtc *crtc,
 	if (ret)
 		goto fail;
 
+	radeon_crtc->cursor_width = width;
+	radeon_crtc->cursor_height = height;
+
 	radeon_lock_cursor(crtc, true);
 	/* XXX only 27 bit offset for legacy cursor */
 	radeon_set_cursor(crtc, obj, gpu_addr);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 9e59868d354e..bbcd1dd7bac0 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -79,7 +79,7 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev)
 			scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base;
 		else
 			scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base;
-		seq = rdev->wb.wb[scratch_index/4];
+		seq = le32_to_cpu(rdev->wb.wb[scratch_index/4]);
 	} else
 		seq = RREG32(rdev->fence_drv.scratch_reg);
 	if (seq != rdev->fence_drv.last_seq) {
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index f0534ef2f331..a533f52fd163 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -181,9 +181,9 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
 
 	for (i = 0; i < pages; i++, p++) {
-		/* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
-		 * is requested. */
-		if (dma_addr[i] != DMA_ERROR_CODE) {
+		/* we reverted the patch using dma_addr in TTM for now but this
+		 * code stops building on alpha so just comment it out for now */
+		if (0) { /*dma_addr[i] != DMA_ERROR_CODE) */
 			rdev->gart.ttm_alloced[p] = true;
 			rdev->gart.pages_addr[p] = dma_addr[i];
 		} else {
@@ -285,4 +285,6 @@ void radeon_gart_fini(struct radeon_device *rdev)
 	rdev->gart.pages = NULL;
 	rdev->gart.pages_addr = NULL;
 	rdev->gart.ttm_alloced = NULL;
+
+	radeon_dummy_page_fini(rdev);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index ded2a45bc95c..983cbac75af0 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -1062,7 +1062,7 @@ void radeon_i2c_get_byte(struct radeon_i2c_chan *i2c_bus,
 		*val = in_buf[0];
 		DRM_DEBUG("val = 0x%02x\n", *val);
 	} else {
-		DRM_ERROR("i2c 0x%02x 0x%02x read failed\n",
+		DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
 			  addr, *val);
 	}
 }
@@ -1084,7 +1084,7 @@ void radeon_i2c_put_byte(struct radeon_i2c_chan *i2c_bus,
 	out_buf[1] = val;
 
 	if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1)
-		DRM_ERROR("i2c 0x%02x 0x%02x write failed\n",
+		DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
 			  addr, val);
 }
 
@@ -1096,6 +1096,9 @@ void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector)
 	if (!radeon_connector->router.ddc_valid)
 		return;
 
+	if (!radeon_connector->router_bus)
+		return;
+
 	radeon_i2c_get_byte(radeon_connector->router_bus,
 			    radeon_connector->router.i2c_addr,
 			    0x3, &val);
@@ -1121,6 +1124,9 @@ void radeon_router_select_cd_port(struct radeon_connector *radeon_connector)
 	if (!radeon_connector->router.cd_valid)
 		return;
 
+	if (!radeon_connector->router_bus)
+		return;
+
 	radeon_i2c_get_byte(radeon_connector->router_bus,
 			    radeon_connector->router.i2c_addr,
 			    0x3, &val);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index bf7d4c061451..bd58af658581 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -221,6 +221,22 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			return -EINVAL;
 		}
 		break;
+	case RADEON_INFO_NUM_TILE_PIPES:
+		if (rdev->family >= CHIP_CAYMAN)
+			value = rdev->config.cayman.max_tile_pipes;
+		else if (rdev->family >= CHIP_CEDAR)
+			value = rdev->config.evergreen.max_tile_pipes;
+		else if (rdev->family >= CHIP_RV770)
+			value = rdev->config.rv770.max_tile_pipes;
+		else if (rdev->family >= CHIP_R600)
+			value = rdev->config.r600.max_tile_pipes;
+		else {
+			return -EINVAL;
+		}
+		break;
+	case RADEON_INFO_FUSION_GART_WORKING:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 5b54268ed6b2..2f46e0c8df53 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -269,7 +269,7 @@ static const struct drm_encoder_helper_funcs radeon_legacy_lvds_helper_funcs = {
 	.disable = radeon_legacy_encoder_disable,
 };
 
-#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
+#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
 
 #define MAX_RADEON_LEVEL 0xFF
 
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 08de669e025a..86eda1ea94df 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -23,6 +23,7 @@
 #include "drmP.h"
 #include "radeon.h"
 #include "avivod.h"
+#include "atom.h"
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
 #endif
@@ -535,7 +536,11 @@ void radeon_pm_resume(struct radeon_device *rdev)
 	/* set up the default clocks if the MC ucode is loaded */
 	if (ASIC_IS_DCE5(rdev) && rdev->mc_fw) {
 		if (rdev->pm.default_vddc)
-			radeon_atom_set_voltage(rdev, rdev->pm.default_vddc);
+			radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
+						SET_VOLTAGE_TYPE_ASIC_VDDC);
+		if (rdev->pm.default_vddci)
+			radeon_atom_set_voltage(rdev, rdev->pm.default_vddci,
+						SET_VOLTAGE_TYPE_ASIC_VDDCI);
 		if (rdev->pm.default_sclk)
 			radeon_set_engine_clock(rdev, rdev->pm.default_sclk);
 		if (rdev->pm.default_mclk)
@@ -548,6 +553,7 @@ void radeon_pm_resume(struct radeon_device *rdev)
 	rdev->pm.current_sclk = rdev->pm.default_sclk;
 	rdev->pm.current_mclk = rdev->pm.default_mclk;
 	rdev->pm.current_vddc = rdev->pm.power_state[rdev->pm.default_power_state_index].clock_info[0].voltage.voltage;
+	rdev->pm.current_vddci = rdev->pm.power_state[rdev->pm.default_power_state_index].clock_info[0].voltage.vddci;
 	if (rdev->pm.pm_method == PM_METHOD_DYNPM
 	    && rdev->pm.dynpm_state == DYNPM_STATE_SUSPENDED) {
 		rdev->pm.dynpm_state = DYNPM_STATE_ACTIVE;
@@ -585,7 +591,8 @@ int radeon_pm_init(struct radeon_device *rdev)
 		/* set up the default clocks if the MC ucode is loaded */
 		if (ASIC_IS_DCE5(rdev) && rdev->mc_fw) {
 			if (rdev->pm.default_vddc)
-				radeon_atom_set_voltage(rdev, rdev->pm.default_vddc);
+				radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
+							SET_VOLTAGE_TYPE_ASIC_VDDC);
 			if (rdev->pm.default_sclk)
 				radeon_set_engine_clock(rdev, rdev->pm.default_sclk);
 			if (rdev->pm.default_mclk)
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index bbc9cd823334..c6776e48fdde 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -248,7 +248,7 @@ void radeon_ib_pool_fini(struct radeon_device *rdev)
 void radeon_ring_free_size(struct radeon_device *rdev)
 {
 	if (rdev->wb.enabled)
-		rdev->cp.rptr = rdev->wb.wb[RADEON_WB_CP_RPTR_OFFSET/4];
+		rdev->cp.rptr = le32_to_cpu(rdev->wb.wb[RADEON_WB_CP_RPTR_OFFSET/4]);
 	else {
 		if (rdev->family >= CHIP_R600)
 			rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
diff --git a/drivers/gpu/drm/radeon/reg_srcs/cayman b/drivers/gpu/drm/radeon/reg_srcs/cayman
index 6334f8ac1209..0aa8e85a9457 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/cayman
+++ b/drivers/gpu/drm/radeon/reg_srcs/cayman
@@ -33,6 +33,7 @@ cayman 0x9400
 0x00008E48 SQ_EX_ALLOC_TABLE_SLOTS
 0x00009100 SPI_CONFIG_CNTL
 0x0000913C SPI_CONFIG_CNTL_1
+0x00009508 TA_CNTL_AUX
 0x00009830 DB_DEBUG
 0x00009834 DB_DEBUG2
 0x00009838 DB_DEBUG3
diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen
index 7e1637176e08..0e28cae7ea43 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/evergreen
+++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen
@@ -46,6 +46,7 @@ evergreen 0x9400
 0x00008E48 SQ_EX_ALLOC_TABLE_SLOTS
 0x00009100 SPI_CONFIG_CNTL
 0x0000913C SPI_CONFIG_CNTL_1
+0x00009508 TA_CNTL_AUX
 0x00009700 VC_CNTL
 0x00009714 VC_ENHANCE
 0x00009830 DB_DEBUG
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
index af0da4ae3f55..92f1900dc7ca 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r600
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -708,6 +708,7 @@ r600 0x9400
 0x00028D0C DB_RENDER_CONTROL
 0x00028D10 DB_RENDER_OVERRIDE
 0x0002880C DB_SHADER_CONTROL
+0x00028D28 DB_SRESULTS_COMPARE_STATE0
 0x00028D2C DB_SRESULTS_COMPARE_STATE1
 0x00028430 DB_STENCILREFMASK
 0x00028434 DB_STENCILREFMASK_BF
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 876cebc4b8ba..6e3b11e5abbe 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -114,7 +114,7 @@ void rs600_pm_misc(struct radeon_device *rdev)
 				udelay(voltage->delay);
 		}
 	} else if (voltage->type == VOLTAGE_VDDC)
-		radeon_atom_set_voltage(rdev, voltage->vddc_id);
+		radeon_atom_set_voltage(rdev, voltage->vddc_id, SET_VOLTAGE_TYPE_ASIC_VDDC);
 
 	dyn_pwrmgt_sclk_length = RREG32_PLL(DYN_PWRMGT_SCLK_LENGTH);
 	dyn_pwrmgt_sclk_length &= ~REDUCED_POWER_SCLK_HILEN(0xf);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index b974ac7df8df..ef8a5babe9f7 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -106,7 +106,7 @@ void rv770_pm_misc(struct radeon_device *rdev)
 
 	if ((voltage->type == VOLTAGE_SW) && voltage->voltage) {
 		if (voltage->voltage != rdev->pm.current_vddc) {
-			radeon_atom_set_voltage(rdev, voltage->voltage);
+			radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC);
 			rdev->pm.current_vddc = voltage->voltage;
 			DRM_DEBUG("Setting: v: %d\n", voltage->voltage);
 		}
@@ -1255,9 +1255,6 @@ int rv770_init(struct radeon_device *rdev)
 {
 	int r;
 
-	r = radeon_dummy_page_init(rdev);
-	if (r)
-		return r;
 	/* This don't do much */
 	r = radeon_gem_init(rdev);
 	if (r)
@@ -1372,7 +1369,6 @@ void rv770_fini(struct radeon_device *rdev)
 	radeon_atombios_fini(rdev);
 	kfree(rdev->bios);
 	rdev->bios = NULL;
-	radeon_dummy_page_fini(rdev);
 }
 
 static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 737a2a2e46a5..9d9d92945f8c 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -683,22 +683,14 @@ int ttm_get_pages(struct list_head *pages, int flags,
 			gfp_flags |= GFP_HIGHUSER;
 
 		for (r = 0; r < count; ++r) {
-			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
-				void *addr;
-				addr = dma_alloc_coherent(NULL, PAGE_SIZE,
-							  &dma_address[r],
-							  gfp_flags);
-				if (addr == NULL)
-					return -ENOMEM;
-				p = virt_to_page(addr);
-			} else
-				p = alloc_page(gfp_flags);
+			p = alloc_page(gfp_flags);
 			if (!p) {
 
 				printk(KERN_ERR TTM_PFX
 				       "Unable to allocate page.");
 				return -ENOMEM;
 			}
+
 			list_add(&p->lru, pages);
 		}
 		return 0;
@@ -746,24 +738,12 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
 	unsigned long irq_flags;
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
 	struct page *p, *tmp;
-	unsigned r;
 
 	if (pool == NULL) {
 		/* No pool for this memory type so free the pages */
 
-		r = page_count-1;
 		list_for_each_entry_safe(p, tmp, pages, lru) {
-			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
-				void *addr = page_address(p);
-				WARN_ON(!addr || !dma_address[r]);
-				if (addr)
-					dma_free_coherent(NULL, PAGE_SIZE,
-							  addr,
-							  dma_address[r]);
-				dma_address[r] = 0;
-			} else
-				__free_page(p);
-			r--;
+			__free_page(p);
 		}
 		/* Make the pages list empty */
 		INIT_LIST_HEAD(pages);
diff --git a/drivers/gpu/stub/Kconfig b/drivers/gpu/stub/Kconfig
index 70e60a4bb678..419917955bf6 100644
--- a/drivers/gpu/stub/Kconfig
+++ b/drivers/gpu/stub/Kconfig
@@ -5,6 +5,7 @@ config STUB_POULSBO
 	# Poulsbo stub depends on ACPI_VIDEO when ACPI is enabled
 	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
 	select BACKLIGHT_CLASS_DEVICE if ACPI
+	select VIDEO_OUTPUT_CONTROL if ACPI
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select THERMAL if ACPI
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index e01cacba685f..498b284e5ef9 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -219,9 +219,6 @@ static int vga_switchto_stage1(struct vga_switcheroo_client *new_client)
 	int i;
 	struct vga_switcheroo_client *active = NULL;
 
-	if (new_client->active == true)
-		return 0;
-
 	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
 		if (vgasr_priv.clients[i].active == true) {
 			active = &vgasr_priv.clients[i];
@@ -372,6 +369,9 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 		goto out;
 	}
 
+	if (client->active == true)
+		goto out;
+
 	/* okay we want a switch - test if devices are willing to switch */
 	can_switch = true;
 	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 060ef6327876..50e40dbd8bb6 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -110,8 +110,7 @@ config SENSORS_ADM1021
 	help
 	  If you say yes here you get support for Analog Devices ADM1021
 	  and ADM1023 sensor chips and clones: Maxim MAX1617 and MAX1617A,
-	  Genesys Logic GL523SM, National Semiconductor LM84, TI THMC10,
-	  and the XEON processor built-in sensor.
+	  Genesys Logic GL523SM, National Semiconductor LM84 and TI THMC10.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called adm1021.
@@ -618,10 +617,10 @@ config SENSORS_LM90
 	depends on I2C
 	help
 	  If you say yes here you get support for National Semiconductor LM90,
-	  LM86, LM89 and LM99, Analog Devices ADM1032 and ADT7461, Maxim
-	  MAX6646, MAX6647, MAX6648, MAX6649, MAX6657, MAX6658, MAX6659,
-	  MAX6680, MAX6681, MAX6692, MAX6695, MAX6696, and Winbond/Nuvoton
-	  W83L771W/G/AWG/ASG sensor chips.
+	  LM86, LM89 and LM99, Analog Devices ADM1032, ADT7461, and ADT7461A,
+	  Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6657, MAX6658, MAX6659,
+	  MAX6680, MAX6681, MAX6692, MAX6695, MAX6696, ON Semiconductor NCT1008,
+	  and Winbond/Nuvoton W83L771W/G/AWG/ASG sensor chips.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called lm90.
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 250d099ca398..da72dc12068c 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -1094,6 +1094,7 @@ static struct attribute *lm85_attributes_minctl[] = {
 	&sensor_dev_attr_pwm1_auto_pwm_minctl.dev_attr.attr,
 	&sensor_dev_attr_pwm2_auto_pwm_minctl.dev_attr.attr,
 	&sensor_dev_attr_pwm3_auto_pwm_minctl.dev_attr.attr,
+	NULL
 };
 
 static const struct attribute_group lm85_group_minctl = {
@@ -1104,6 +1105,7 @@ static struct attribute *lm85_attributes_temp_off[] = {
 	&sensor_dev_attr_temp1_auto_temp_off.dev_attr.attr,
 	&sensor_dev_attr_temp2_auto_temp_off.dev_attr.attr,
 	&sensor_dev_attr_temp3_auto_temp_off.dev_attr.attr,
+	NULL
 };
 
 static const struct attribute_group lm85_group_temp_off = {
@@ -1329,11 +1331,11 @@ static int lm85_probe(struct i2c_client *client,
 	if (data->type != emc6d103s) {
 		err = sysfs_create_group(&client->dev.kobj, &lm85_group_minctl);
 		if (err)
-			goto err_kfree;
+			goto err_remove_files;
 		err = sysfs_create_group(&client->dev.kobj,
 					 &lm85_group_temp_off);
 		if (err)
-			goto err_kfree;
+			goto err_remove_files;
 	}
 
 	/* The ADT7463/68 have an optional VRM 10 mode where pin 21 is used
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index c43b4e9f96a9..2f94f9504804 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -49,10 +49,10 @@
  * chips, but support three temperature sensors instead of two. MAX6695
  * and MAX6696 only differ in the pinout so they can be treated identically.
  *
- * This driver also supports the ADT7461 chip from Analog Devices.
- * It's supported in both compatibility and extended mode. It is mostly
- * compatible with LM90 except for a data format difference for the
- * temperature value registers.
+ * This driver also supports ADT7461 and ADT7461A from Analog Devices as well as
+ * NCT1008 from ON Semiconductor. The chips are supported in both compatibility
+ * and extended mode. They are mostly compatible with LM90 except for a data
+ * format difference for the temperature value registers.
  *
  * Since the LM90 was the first chipset supported by this driver, most
  * comments will refer to this chipset, but are actually general and
@@ -88,9 +88,10 @@
  * Addresses to scan
  * Address is fully defined internally and cannot be changed except for
  * MAX6659, MAX6680 and MAX6681.
- * LM86, LM89, LM90, LM99, ADM1032, ADM1032-1, ADT7461, MAX6649, MAX6657,
- * MAX6658 and W83L771 have address 0x4c.
- * ADM1032-2, ADT7461-2, LM89-1, LM99-1 and MAX6646 have address 0x4d.
+ * LM86, LM89, LM90, LM99, ADM1032, ADM1032-1, ADT7461, ADT7461A, MAX6649,
+ * MAX6657, MAX6658, NCT1008 and W83L771 have address 0x4c.
+ * ADM1032-2, ADT7461-2, ADT7461A-2, LM89-1, LM99-1, MAX6646, and NCT1008D
+ * have address 0x4d.
  * MAX6647 has address 0x4e.
  * MAX6659 can have address 0x4c, 0x4d or 0x4e.
  * MAX6680 and MAX6681 can have address 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
@@ -174,6 +175,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680,
 static const struct i2c_device_id lm90_id[] = {
 	{ "adm1032", adm1032 },
 	{ "adt7461", adt7461 },
+	{ "adt7461a", adt7461 },
 	{ "lm90", lm90 },
 	{ "lm86", lm86 },
 	{ "lm89", lm86 },
@@ -188,6 +190,7 @@ static const struct i2c_device_id lm90_id[] = {
 	{ "max6681", max6680 },
 	{ "max6695", max6696 },
 	{ "max6696", max6696 },
+	{ "nct1008", adt7461 },
 	{ "w83l771", w83l771 },
 	{ }
 };
@@ -1153,6 +1156,11 @@ static int lm90_detect(struct i2c_client *new_client,
 		 && (reg_config1 & 0x1B) == 0x00
 		 && reg_convrate <= 0x0A) {
 			name = "adt7461";
+		} else
+		if (chip_id == 0x57 /* ADT7461A, NCT1008 */
+		 && (reg_config1 & 0x1B) == 0x00
+		 && reg_convrate <= 0x0A) {
+			name = "adt7461a";
 		}
 	} else
 	if (man_id == 0x4D) { /* Maxim */
diff --git a/drivers/hwmon/pmbus_core.c b/drivers/hwmon/pmbus_core.c
index edfb92e41735..196ffafafd88 100644
--- a/drivers/hwmon/pmbus_core.c
+++ b/drivers/hwmon/pmbus_core.c
@@ -139,7 +139,6 @@ struct pmbus_data {
 	 * A single status register covers multiple attributes,
 	 * so we keep them all together.
 	 */
-	u8 status_bits;
 	u8 status[PB_NUM_STATUS_REG];
 
 	u8 currpage;
diff --git a/drivers/hwmon/twl4030-madc-hwmon.c b/drivers/hwmon/twl4030-madc-hwmon.c
index de5819199e2e..57240740b161 100644
--- a/drivers/hwmon/twl4030-madc-hwmon.c
+++ b/drivers/hwmon/twl4030-madc-hwmon.c
@@ -98,7 +98,6 @@ static const struct attribute_group twl4030_madc_group = {
 static int __devinit twl4030_madc_hwmon_probe(struct platform_device *pdev)
 {
 	int ret;
-	int status;
 	struct device *hwmon;
 
 	ret = sysfs_create_group(&pdev->dev.kobj, &twl4030_madc_group);
@@ -107,7 +106,7 @@ static int __devinit twl4030_madc_hwmon_probe(struct platform_device *pdev)
 	hwmon = hwmon_device_register(&pdev->dev);
 	if (IS_ERR(hwmon)) {
 		dev_err(&pdev->dev, "hwmon_device_register failed.\n");
-		status = PTR_ERR(hwmon);
+		ret = PTR_ERR(hwmon);
 		goto err_reg;
 	}
 
diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index 38319a69bd0a..d6d58684712b 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -232,9 +232,17 @@ static int i2c_inb(struct i2c_adapter *i2c_adap)
  * Sanity check for the adapter hardware - check the reaction of
  * the bus lines only if it seems to be idle.
  */
-static int test_bus(struct i2c_algo_bit_data *adap, char *name)
+static int test_bus(struct i2c_adapter *i2c_adap)
 {
-	int scl, sda;
+	struct i2c_algo_bit_data *adap = i2c_adap->algo_data;
+	const char *name = i2c_adap->name;
+	int scl, sda, ret;
+
+	if (adap->pre_xfer) {
+		ret = adap->pre_xfer(i2c_adap);
+		if (ret < 0)
+			return -ENODEV;
+	}
 
 	if (adap->getscl == NULL)
 		pr_info("%s: Testing SDA only, SCL is not readable\n", name);
@@ -297,11 +305,19 @@ static int test_bus(struct i2c_algo_bit_data *adap, char *name)
 		       "while pulling SCL high!\n", name);
 		goto bailout;
 	}
+
+	if (adap->post_xfer)
+		adap->post_xfer(i2c_adap);
+
 	pr_info("%s: Test OK\n", name);
 	return 0;
 bailout:
 	sdahi(adap);
 	sclhi(adap);
+
+	if (adap->post_xfer)
+		adap->post_xfer(i2c_adap);
+
 	return -ENODEV;
 }
 
@@ -607,7 +623,7 @@ static int __i2c_bit_add_bus(struct i2c_adapter *adap,
 	int ret;
 
 	if (bit_test) {
-		ret = test_bus(bit_adap, adap->name);
+		ret = test_bus(adap);
 		if (ret < 0)
 			return -ENODEV;
 	}
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 72c0415f6f94..455e909bc768 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -134,10 +134,15 @@
 				 SMBHSTSTS_BUS_ERR | SMBHSTSTS_DEV_ERR | \
 				 SMBHSTSTS_INTR)
 
+/* Older devices have their ID defined in <linux/pci_ids.h> */
+#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
+#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF0	0x1d70
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF1	0x1d71
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2	0x1d72
+#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS	0x2330
+#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
 
 struct i801_priv {
 	struct i2c_adapter adapter;
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 75b984c519ac..107397a606b4 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -560,15 +560,18 @@ static struct i2c_adapter mpc_ops = {
 	.timeout = HZ,
 };
 
+static const struct of_device_id mpc_i2c_of_match[];
 static int __devinit fsl_i2c_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct mpc_i2c *i2c;
 	const u32 *prop;
 	u32 clock = MPC_I2C_CLOCK_LEGACY;
 	int result = 0;
 	int plen;
 
-	if (!op->dev.of_match)
+	match = of_match_device(mpc_i2c_of_match, &op->dev);
+	if (!match)
 		return -EINVAL;
 
 	i2c = kzalloc(sizeof(*i2c), GFP_KERNEL);
@@ -605,8 +608,8 @@ static int __devinit fsl_i2c_probe(struct platform_device *op)
 			clock = *prop;
 	}
 
-	if (op->dev.of_match->data) {
-		struct mpc_i2c_data *data = op->dev.of_match->data;
+	if (match->data) {
+		struct mpc_i2c_data *data = match->data;
 		data->setup(op->dev.of_node, i2c, clock, data->prescaler);
 	} else {
 		/* Backwards compatibility */
diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c
index 0eb1515541e7..2dbba163b102 100644
--- a/drivers/i2c/busses/i2c-parport.c
+++ b/drivers/i2c/busses/i2c-parport.c
@@ -1,7 +1,7 @@
 /* ------------------------------------------------------------------------ *
  * i2c-parport.c I2C bus over parallel port                                 *
  * ------------------------------------------------------------------------ *
-   Copyright (C) 2003-2010 Jean Delvare <khali@linux-fr.org>
+   Copyright (C) 2003-2011 Jean Delvare <khali@linux-fr.org>
    
    Based on older i2c-philips-par.c driver
    Copyright (C) 1995-2000 Simon G. Vogl
@@ -33,6 +33,8 @@
 #include <linux/i2c-algo-bit.h>
 #include <linux/i2c-smbus.h>
 #include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
 #include "i2c-parport.h"
 
 /* ----- Device list ------------------------------------------------------ */
@@ -43,10 +45,11 @@ struct i2c_par {
 	struct i2c_algo_bit_data algo_data;
 	struct i2c_smbus_alert_setup alert_data;
 	struct i2c_client *ara;
-	struct i2c_par *next;
+	struct list_head node;
 };
 
-static struct i2c_par *adapter_list;
+static LIST_HEAD(adapter_list);
+static DEFINE_MUTEX(adapter_list_lock);
 
 /* ----- Low-level parallel port access ----------------------------------- */
 
@@ -228,8 +231,9 @@ static void i2c_parport_attach (struct parport *port)
 	}
 
 	/* Add the new adapter to the list */
-	adapter->next = adapter_list;
-	adapter_list = adapter;
+	mutex_lock(&adapter_list_lock);
+	list_add_tail(&adapter->node, &adapter_list);
+	mutex_unlock(&adapter_list_lock);
         return;
 
 ERROR1:
@@ -241,11 +245,11 @@ ERROR0:
 
 static void i2c_parport_detach (struct parport *port)
 {
-	struct i2c_par *adapter, *prev;
+	struct i2c_par *adapter, *_n;
 
 	/* Walk the list */
-	for (prev = NULL, adapter = adapter_list; adapter;
-	     prev = adapter, adapter = adapter->next) {
+	mutex_lock(&adapter_list_lock);
+	list_for_each_entry_safe(adapter, _n, &adapter_list, node) {
 		if (adapter->pdev->port == port) {
 			if (adapter->ara) {
 				parport_disable_irq(port);
@@ -259,14 +263,11 @@ static void i2c_parport_detach (struct parport *port)
 				
 			parport_release(adapter->pdev);
 			parport_unregister_device(adapter->pdev);
-			if (prev)
-				prev->next = adapter->next;
-			else
-				adapter_list = adapter->next;
+			list_del(&adapter->node);
 			kfree(adapter);
-			return;
 		}
 	}
+	mutex_unlock(&adapter_list_lock);
 }
 
 static struct parport_driver i2c_parport_driver = {
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index a97e3fec8148..04be9f82e14b 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -65,7 +65,7 @@ static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data)
 		jiffies, expires);
 
 	timer->expires = jiffies + expires;
-	timer->data = (unsigned long)&alg_data;
+	timer->data = (unsigned long)alg_data;
 
 	add_timer(timer);
 }
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 70c30e6bce0b..9a58994ff7ea 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -797,7 +797,8 @@ static int i2c_do_add_adapter(struct i2c_driver *driver,
 
 	/* Let legacy drivers scan this bus for matching devices */
 	if (driver->attach_adapter) {
-		dev_warn(&adap->dev, "attach_adapter method is deprecated\n");
+		dev_warn(&adap->dev, "%s: attach_adapter method is deprecated\n",
+			 driver->driver.name);
 		dev_warn(&adap->dev, "Please use another way to instantiate "
 			 "your i2c_client\n");
 		/* We ignore the return code; if it fails, too bad */
@@ -984,7 +985,8 @@ static int i2c_do_del_adapter(struct i2c_driver *driver,
 
 	if (!driver->detach_adapter)
 		return 0;
-	dev_warn(&adapter->dev, "detach_adapter method is deprecated\n");
+	dev_warn(&adapter->dev, "%s: detach_adapter method is deprecated\n",
+		 driver->driver.name);
 	res = driver->detach_adapter(adapter);
 	if (res)
 		dev_err(&adapter->dev, "detach_adapter failed (%d) "
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index fd1e11799137..a5ec5a7cb381 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive)
 	ide_cd_read_toc(drive, &sense);
 	g->fops = &idecd_ops;
 	g->flags |= GENHD_FL_REMOVABLE;
-	g->events = DISK_EVENT_MEDIA_CHANGE;
 	add_disk(g);
 	return 0;
 
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 2a6bc50e8a41..02caa7dd51c8 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -79,6 +79,12 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 	return CDS_DRIVE_NOT_READY;
 }
 
+/*
+ * ide-cd always generates media changed event if media is missing, which
+ * makes it impossible to use for proper event reporting, so disk->events
+ * is cleared to 0 and the following function is used only to trigger
+ * revalidation and never propagated to userland.
+ */
 unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *cdi,
 					 unsigned int clearing, int slot_nr)
 {
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index c4ffd4888939..70ea8763567d 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -298,6 +298,12 @@ static unsigned int ide_gd_check_events(struct gendisk *disk,
 		return 0;
 	}
 
+	/*
+	 * The following is used to force revalidation on the first open on
+	 * removeable devices, and never gets reported to userland as
+	 * genhd->events is 0.  This is intended as removeable ide disk
+	 * can't really detect MEDIA_CHANGE events.
+	 */
 	ret = drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED;
 	drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
 
@@ -413,7 +419,6 @@ static int ide_gd_probe(ide_drive_t *drive)
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
 	g->fops = &ide_gd_ops;
-	g->events = DISK_EVENT_MEDIA_CHANGE;
 	add_disk(g);
 	return 0;
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5ed9d25d021a..99dde874fbbd 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -148,6 +148,7 @@ struct rdma_id_private {
 	u32			qp_num;
 	u8			srq;
 	u8			tos;
+	u8			reuseaddr;
 };
 
 struct cma_multicast {
@@ -712,6 +713,21 @@ static inline int cma_any_addr(struct sockaddr *addr)
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
 
+static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
+{
+	if (src->sa_family != dst->sa_family)
+		return -1;
+
+	switch (src->sa_family) {
+	case AF_INET:
+		return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
+		       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+	default:
+		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
+				     &((struct sockaddr_in6 *) dst)->sin6_addr);
+	}
+}
+
 static inline __be16 cma_port(struct sockaddr *addr)
 {
 	if (addr->sa_family == AF_INET)
@@ -1564,50 +1580,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
 	mutex_unlock(&lock);
 }
 
-int rdma_listen(struct rdma_cm_id *id, int backlog)
-{
-	struct rdma_id_private *id_priv;
-	int ret;
-
-	id_priv = container_of(id, struct rdma_id_private, id);
-	if (id_priv->state == CMA_IDLE) {
-		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
-		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
-		if (ret)
-			return ret;
-	}
-
-	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
-		return -EINVAL;
-
-	id_priv->backlog = backlog;
-	if (id->device) {
-		switch (rdma_node_get_transport(id->device->node_type)) {
-		case RDMA_TRANSPORT_IB:
-			ret = cma_ib_listen(id_priv);
-			if (ret)
-				goto err;
-			break;
-		case RDMA_TRANSPORT_IWARP:
-			ret = cma_iw_listen(id_priv, backlog);
-			if (ret)
-				goto err;
-			break;
-		default:
-			ret = -ENOSYS;
-			goto err;
-		}
-	} else
-		cma_listen_on_all(id_priv);
-
-	return 0;
-err:
-	id_priv->backlog = 0;
-	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
-	return ret;
-}
-EXPORT_SYMBOL(rdma_listen);
-
 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
 {
 	struct rdma_id_private *id_priv;
@@ -2090,6 +2062,25 @@ err:
 }
 EXPORT_SYMBOL(rdma_resolve_addr);
 
+int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
+{
+	struct rdma_id_private *id_priv;
+	unsigned long flags;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	spin_lock_irqsave(&id_priv->lock, flags);
+	if (id_priv->state == CMA_IDLE) {
+		id_priv->reuseaddr = reuse;
+		ret = 0;
+	} else {
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_set_reuseaddr);
+
 static void cma_bind_port(struct rdma_bind_list *bind_list,
 			  struct rdma_id_private *id_priv)
 {
@@ -2165,41 +2156,71 @@ retry:
 	return -EADDRNOTAVAIL;
 }
 
-static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+/*
+ * Check that the requested port is available.  This is called when trying to
+ * bind to a specific port, or when trying to listen on a bound port.  In
+ * the latter case, the provided id_priv may already be on the bind_list, but
+ * we still need to check that it's okay to start listening.
+ */
+static int cma_check_port(struct rdma_bind_list *bind_list,
+			  struct rdma_id_private *id_priv, uint8_t reuseaddr)
 {
 	struct rdma_id_private *cur_id;
-	struct sockaddr_in *sin, *cur_sin;
-	struct rdma_bind_list *bind_list;
+	struct sockaddr *addr, *cur_addr;
 	struct hlist_node *node;
+
+	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+	if (cma_any_addr(addr) && !reuseaddr)
+		return -EADDRNOTAVAIL;
+
+	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+		if (id_priv == cur_id)
+			continue;
+
+		if ((cur_id->state == CMA_LISTEN) ||
+		    !reuseaddr || !cur_id->reuseaddr) {
+			cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
+			if (cma_any_addr(cur_addr))
+				return -EADDRNOTAVAIL;
+
+			if (!cma_addr_cmp(addr, cur_addr))
+				return -EADDRINUSE;
+		}
+	}
+	return 0;
+}
+
+static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+	struct rdma_bind_list *bind_list;
 	unsigned short snum;
+	int ret;
 
-	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
-	snum = ntohs(sin->sin_port);
+	snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
 	bind_list = idr_find(ps, snum);
-	if (!bind_list)
-		return cma_alloc_port(ps, id_priv, snum);
-
-	/*
-	 * We don't support binding to any address if anyone is bound to
-	 * a specific address on the same port.
-	 */
-	if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr))
-		return -EADDRNOTAVAIL;
-
-	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
-		if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr))
-			return -EADDRNOTAVAIL;
-
-		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
-		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
-			return -EADDRINUSE;
+	if (!bind_list) {
+		ret = cma_alloc_port(ps, id_priv, snum);
+	} else {
+		ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
+		if (!ret)
+			cma_bind_port(bind_list, id_priv);
 	}
+	return ret;
+}
 
-	cma_bind_port(bind_list, id_priv);
-	return 0;
+static int cma_bind_listen(struct rdma_id_private *id_priv)
+{
+	struct rdma_bind_list *bind_list = id_priv->bind_list;
+	int ret = 0;
+
+	mutex_lock(&lock);
+	if (bind_list->owners.first->next)
+		ret = cma_check_port(bind_list, id_priv, 0);
+	mutex_unlock(&lock);
+	return ret;
 }
 
 static int cma_get_port(struct rdma_id_private *id_priv)
@@ -2253,6 +2274,56 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
 	return 0;
 }
 
+int rdma_listen(struct rdma_cm_id *id, int backlog)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id_priv->state == CMA_IDLE) {
+		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
+		if (ret)
+			return ret;
+	}
+
+	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
+		return -EINVAL;
+
+	if (id_priv->reuseaddr) {
+		ret = cma_bind_listen(id_priv);
+		if (ret)
+			goto err;
+	}
+
+	id_priv->backlog = backlog;
+	if (id->device) {
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			ret = cma_ib_listen(id_priv);
+			if (ret)
+				goto err;
+			break;
+		case RDMA_TRANSPORT_IWARP:
+			ret = cma_iw_listen(id_priv, backlog);
+			if (ret)
+				goto err;
+			break;
+		default:
+			ret = -ENOSYS;
+			goto err;
+		}
+	} else
+		cma_listen_on_all(id_priv);
+
+	return 0;
+err:
+	id_priv->backlog = 0;
+	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_listen);
+
 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 {
 	struct rdma_id_private *id_priv;
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 2a1e9ae134b4..a9c042345c6f 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -725,7 +725,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
 	 */
 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
-	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+	if (iw_event->status == 0) {
 		cm_id_priv->id.local_addr = iw_event->local_addr;
 		cm_id_priv->id.remote_addr = iw_event->remote_addr;
 		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ec1e9da1488b..b3fa798525b2 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -883,6 +883,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
 		}
 		rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
 		break;
+	case RDMA_OPTION_ID_REUSEADDR:
+		if (optlen != sizeof(int)) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
+		break;
 	default:
 		ret = -ENOSYS;
 	}
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 6aa53cd69478..f660cd04ec2f 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1199,9 +1199,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 	}
 	PDBG("%s ep %p status %d error %d\n", __func__, ep,
 	     rpl->status, status2errno(rpl->status));
-	ep->com.wr_wait.ret = status2errno(rpl->status);
-	ep->com.wr_wait.done = 1;
-	wake_up(&ep->com.wr_wait.wait);
+	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
 
 	return 0;
 }
@@ -1235,9 +1233,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 	struct c4iw_listen_ep *ep = lookup_stid(t, stid);
 
 	PDBG("%s ep %p\n", __func__, ep);
-	ep->com.wr_wait.ret = status2errno(rpl->status);
-	ep->com.wr_wait.done = 1;
-	wake_up(&ep->com.wr_wait.wait);
+	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
 	return 0;
 }
 
@@ -1467,7 +1463,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 	struct c4iw_qp_attributes attrs;
 	int disconnect = 1;
 	int release = 0;
-	int closing = 0;
+	int abort = 0;
 	struct tid_info *t = dev->rdev.lldi.tids;
 	unsigned int tid = GET_TID(hdr);
 
@@ -1493,23 +1489,22 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 		 * in rdma connection migration (see c4iw_accept_cr()).
 		 */
 		__state_set(&ep->com, CLOSING);
-		ep->com.wr_wait.done = 1;
-		ep->com.wr_wait.ret = -ECONNRESET;
 		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
-		wake_up(&ep->com.wr_wait.wait);
+		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 		break;
 	case MPA_REP_SENT:
 		__state_set(&ep->com, CLOSING);
-		ep->com.wr_wait.done = 1;
-		ep->com.wr_wait.ret = -ECONNRESET;
 		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
-		wake_up(&ep->com.wr_wait.wait);
+		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 		break;
 	case FPDU_MODE:
 		start_ep_timer(ep);
 		__state_set(&ep->com, CLOSING);
-		closing = 1;
+		attrs.next_state = C4IW_QP_STATE_CLOSING;
+		abort = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
 		peer_close_upcall(ep);
+		disconnect = 1;
 		break;
 	case ABORTING:
 		disconnect = 0;
@@ -1537,11 +1532,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 		BUG_ON(1);
 	}
 	mutex_unlock(&ep->com.mutex);
-	if (closing) {
-		attrs.next_state = C4IW_QP_STATE_CLOSING;
-		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
-			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
-	}
 	if (disconnect)
 		c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
 	if (release)
@@ -1582,9 +1572,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 	/*
 	 * Wake up any threads in rdma_init() or rdma_fini().
 	 */
-	ep->com.wr_wait.done = 1;
-	ep->com.wr_wait.ret = -ECONNRESET;
-	wake_up(&ep->com.wr_wait.wait);
+	c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
@@ -1711,14 +1699,14 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
 	ep = lookup_tid(t, tid);
 	BUG_ON(!ep);
 
-	if (ep->com.qp) {
+	if (ep && ep->com.qp) {
 		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
 		       ep->com.qp->wq.sq.qid);
 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
 		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
 			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
 	} else
-		printk(KERN_WARNING MOD "TERM received tid %u no qp\n", tid);
+		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
 
 	return 0;
 }
@@ -2297,14 +2285,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
 		ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
 		wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
 		PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
-		if (wr_waitp) {
-			if (ret)
-				wr_waitp->ret = -ret;
-			else
-				wr_waitp->ret = 0;
-			wr_waitp->done = 1;
-			wake_up(&wr_waitp->wait);
-		}
+		if (wr_waitp)
+			c4iw_wake_up(wr_waitp, ret ? -ret : 0);
 		kfree_skb(skb);
 		break;
 	case 2:
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index e29172c2afcb..40a13cc633a3 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -44,7 +44,7 @@ MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
-static LIST_HEAD(dev_list);
+static LIST_HEAD(uld_ctx_list);
 static DEFINE_MUTEX(dev_mutex);
 
 static struct dentry *c4iw_debugfs_root;
@@ -370,18 +370,23 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 	c4iw_destroy_resource(&rdev->resource);
 }
 
-static void c4iw_remove(struct c4iw_dev *dev)
+struct uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+	struct c4iw_dev *dev;
+};
+
+static void c4iw_remove(struct uld_ctx *ctx)
 {
-	PDBG("%s c4iw_dev %p\n", __func__,  dev);
-	list_del(&dev->entry);
-	if (dev->registered)
-		c4iw_unregister_device(dev);
-	c4iw_rdev_close(&dev->rdev);
-	idr_destroy(&dev->cqidr);
-	idr_destroy(&dev->qpidr);
-	idr_destroy(&dev->mmidr);
-	iounmap(dev->rdev.oc_mw_kva);
-	ib_dealloc_device(&dev->ibdev);
+	PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
+	c4iw_unregister_device(ctx->dev);
+	c4iw_rdev_close(&ctx->dev->rdev);
+	idr_destroy(&ctx->dev->cqidr);
+	idr_destroy(&ctx->dev->qpidr);
+	idr_destroy(&ctx->dev->mmidr);
+	iounmap(ctx->dev->rdev.oc_mw_kva);
+	ib_dealloc_device(&ctx->dev->ibdev);
+	ctx->dev = NULL;
 }
 
 static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
@@ -392,7 +397,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 	devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
 	if (!devp) {
 		printk(KERN_ERR MOD "Cannot allocate ib device\n");
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 	devp->rdev.lldi = *infop;
 
@@ -402,27 +407,23 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 	devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
 					       devp->rdev.lldi.vr->ocq.size);
 
-	printk(KERN_INFO MOD "ocq memory: "
+	PDBG(KERN_INFO MOD "ocq memory: "
 	       "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
 	       devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
 	       devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
 
-	mutex_lock(&dev_mutex);
-
 	ret = c4iw_rdev_open(&devp->rdev);
 	if (ret) {
 		mutex_unlock(&dev_mutex);
 		printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
 		ib_dealloc_device(&devp->ibdev);
-		return NULL;
+		return ERR_PTR(ret);
 	}
 
 	idr_init(&devp->cqidr);
 	idr_init(&devp->qpidr);
 	idr_init(&devp->mmidr);
 	spin_lock_init(&devp->lock);
-	list_add_tail(&devp->entry, &dev_list);
-	mutex_unlock(&dev_mutex);
 
 	if (c4iw_debugfs_root) {
 		devp->debugfs_root = debugfs_create_dir(
@@ -435,7 +436,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 
 static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
 {
-	struct c4iw_dev *dev;
+	struct uld_ctx *ctx;
 	static int vers_printed;
 	int i;
 
@@ -443,25 +444,33 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
 		printk(KERN_INFO MOD "Chelsio T4 RDMA Driver - version %s\n",
 		       DRV_VERSION);
 
-	dev = c4iw_alloc(infop);
-	if (!dev)
+	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+	if (!ctx) {
+		ctx = ERR_PTR(-ENOMEM);
 		goto out;
+	}
+	ctx->lldi = *infop;
 
 	PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
-	     __func__, pci_name(dev->rdev.lldi.pdev),
-	     dev->rdev.lldi.nchan, dev->rdev.lldi.nrxq,
-	     dev->rdev.lldi.ntxq, dev->rdev.lldi.nports);
+	     __func__, pci_name(ctx->lldi.pdev),
+	     ctx->lldi.nchan, ctx->lldi.nrxq,
+	     ctx->lldi.ntxq, ctx->lldi.nports);
+
+	mutex_lock(&dev_mutex);
+	list_add_tail(&ctx->entry, &uld_ctx_list);
+	mutex_unlock(&dev_mutex);
 
-	for (i = 0; i < dev->rdev.lldi.nrxq; i++)
-		PDBG("rxqid[%u] %u\n", i, dev->rdev.lldi.rxq_ids[i]);
+	for (i = 0; i < ctx->lldi.nrxq; i++)
+		PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
 out:
-	return dev;
+	return ctx;
 }
 
 static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
 			const struct pkt_gl *gl)
 {
-	struct c4iw_dev *dev = handle;
+	struct uld_ctx *ctx = handle;
+	struct c4iw_dev *dev = ctx->dev;
 	struct sk_buff *skb;
 	const struct cpl_act_establish *rpl;
 	unsigned int opcode;
@@ -503,47 +512,49 @@ nomem:
 
 static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
 {
-	struct c4iw_dev *dev = handle;
+	struct uld_ctx *ctx = handle;
 
 	PDBG("%s new_state %u\n", __func__, new_state);
 	switch (new_state) {
 	case CXGB4_STATE_UP:
-		printk(KERN_INFO MOD "%s: Up\n", pci_name(dev->rdev.lldi.pdev));
-		if (!dev->registered) {
-			int ret;
-			ret = c4iw_register_device(dev);
-			if (ret)
+		printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
+		if (!ctx->dev) {
+			int ret = 0;
+
+			ctx->dev = c4iw_alloc(&ctx->lldi);
+			if (!IS_ERR(ctx->dev))
+				ret = c4iw_register_device(ctx->dev);
+			if (IS_ERR(ctx->dev) || ret)
 				printk(KERN_ERR MOD
 				       "%s: RDMA registration failed: %d\n",
-				       pci_name(dev->rdev.lldi.pdev), ret);
+				       pci_name(ctx->lldi.pdev), ret);
 		}
 		break;
 	case CXGB4_STATE_DOWN:
 		printk(KERN_INFO MOD "%s: Down\n",
-		       pci_name(dev->rdev.lldi.pdev));
-		if (dev->registered)
-			c4iw_unregister_device(dev);
+		       pci_name(ctx->lldi.pdev));
+		if (ctx->dev)
+			c4iw_remove(ctx);
 		break;
 	case CXGB4_STATE_START_RECOVERY:
 		printk(KERN_INFO MOD "%s: Fatal Error\n",
-		       pci_name(dev->rdev.lldi.pdev));
-		dev->rdev.flags |= T4_FATAL_ERROR;
-		if (dev->registered) {
+		       pci_name(ctx->lldi.pdev));
+		if (ctx->dev) {
 			struct ib_event event;
 
+			ctx->dev->rdev.flags |= T4_FATAL_ERROR;
 			memset(&event, 0, sizeof event);
 			event.event  = IB_EVENT_DEVICE_FATAL;
-			event.device = &dev->ibdev;
+			event.device = &ctx->dev->ibdev;
 			ib_dispatch_event(&event);
-			c4iw_unregister_device(dev);
+			c4iw_remove(ctx);
 		}
 		break;
 	case CXGB4_STATE_DETACH:
 		printk(KERN_INFO MOD "%s: Detach\n",
-		       pci_name(dev->rdev.lldi.pdev));
-		mutex_lock(&dev_mutex);
-		c4iw_remove(dev);
-		mutex_unlock(&dev_mutex);
+		       pci_name(ctx->lldi.pdev));
+		if (ctx->dev)
+			c4iw_remove(ctx);
 		break;
 	}
 	return 0;
@@ -576,11 +587,13 @@ static int __init c4iw_init_module(void)
 
 static void __exit c4iw_exit_module(void)
 {
-	struct c4iw_dev *dev, *tmp;
+	struct uld_ctx *ctx, *tmp;
 
 	mutex_lock(&dev_mutex);
-	list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
-		c4iw_remove(dev);
+	list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
+		if (ctx->dev)
+			c4iw_remove(ctx);
+		kfree(ctx);
 	}
 	mutex_unlock(&dev_mutex);
 	cxgb4_unregister_uld(CXGB4_ULD_RDMA);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 9f6166f59268..35d2a5dd9bb4 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -131,42 +131,58 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev)
 
 #define C4IW_WR_TO (10*HZ)
 
+enum {
+	REPLY_READY = 0,
+};
+
 struct c4iw_wr_wait {
 	wait_queue_head_t wait;
-	int done;
+	unsigned long status;
 	int ret;
 };
 
 static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp)
 {
 	wr_waitp->ret = 0;
-	wr_waitp->done = 0;
+	wr_waitp->status = 0;
 	init_waitqueue_head(&wr_waitp->wait);
 }
 
+static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret)
+{
+	wr_waitp->ret = ret;
+	set_bit(REPLY_READY, &wr_waitp->status);
+	wake_up(&wr_waitp->wait);
+}
+
 static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
 				 struct c4iw_wr_wait *wr_waitp,
 				 u32 hwtid, u32 qpid,
 				 const char *func)
 {
 	unsigned to = C4IW_WR_TO;
-	do {
+	int ret;
 
-		wait_event_timeout(wr_waitp->wait, wr_waitp->done, to);
-		if (!wr_waitp->done) {
+	do {
+		ret = wait_event_timeout(wr_waitp->wait,
+			test_and_clear_bit(REPLY_READY, &wr_waitp->status), to);
+		if (!ret) {
 			printk(KERN_ERR MOD "%s - Device %s not responding - "
 			       "tid %u qpid %u\n", func,
 			       pci_name(rdev->lldi.pdev), hwtid, qpid);
+			if (c4iw_fatal_error(rdev)) {
+				wr_waitp->ret = -EIO;
+				break;
+			}
 			to = to << 2;
 		}
-	} while (!wr_waitp->done);
+	} while (!ret);
 	if (wr_waitp->ret)
-		printk(KERN_WARNING MOD "%s: FW reply %d tid %u qpid %u\n",
-		       pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+		PDBG("%s: FW reply %d tid %u qpid %u\n",
+		     pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
 	return wr_waitp->ret;
 }
 
-
 struct c4iw_dev {
 	struct ib_device ibdev;
 	struct c4iw_rdev rdev;
@@ -175,9 +191,7 @@ struct c4iw_dev {
 	struct idr qpidr;
 	struct idr mmidr;
 	spinlock_t lock;
-	struct list_head entry;
 	struct dentry *debugfs_root;
-	u8 registered;
 };
 
 static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index f66dd8bf5128..5b9e4220ca08 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -516,7 +516,6 @@ int c4iw_register_device(struct c4iw_dev *dev)
 		if (ret)
 			goto bail2;
 	}
-	dev->registered = 1;
 	return 0;
 bail2:
 	ib_unregister_device(&dev->ibdev);
@@ -535,6 +534,5 @@ void c4iw_unregister_device(struct c4iw_dev *dev)
 				   c4iw_class_attributes[i]);
 	ib_unregister_device(&dev->ibdev);
 	kfree(dev->ibdev.iwcm);
-	dev->registered = 0;
 	return;
 }
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 70a5a3c646da..3b773b05a898 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -214,7 +214,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		V_FW_RI_RES_WR_HOSTFCMODE(0) |	/* no host cidx updates */
 		V_FW_RI_RES_WR_CPRIO(0) |	/* don't keep in chip cache */
 		V_FW_RI_RES_WR_PCIECHN(0) |	/* set by uP at ri_init time */
-		t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0 |
+		(t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0) |
 		V_FW_RI_RES_WR_IQID(scq->cqid));
 	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
 		V_FW_RI_RES_WR_DCAEN(0) |
@@ -1210,7 +1210,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 			if (ret) {
 				if (internal)
 					c4iw_get_ep(&qhp->ep->com);
-				disconnect = abort = 1;
 				goto err;
 			}
 			break;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 24af12fc8228..c0221eec8817 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -269,11 +269,8 @@ struct t4_swsqe {
 
 static inline pgprot_t t4_pgprot_wc(pgprot_t prot)
 {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
 	return pgprot_writecombine(prot);
-#elif defined(CONFIG_PPC64)
-	return __pgprot((pgprot_val(prot) | _PAGE_NO_CACHE) &
-			~(pgprot_t)_PAGE_GUARDED);
 #else
 	return pgprot_noncached(prot);
 #endif
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 58c0e417bc30..be24ac726114 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -398,7 +398,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	struct ipath_devdata *dd;
 	unsigned long long addr;
 	u32 bar0 = 0, bar1 = 0;
-	u8 rev;
 
 	dd = ipath_alloc_devdata(pdev);
 	if (IS_ERR(dd)) {
@@ -540,13 +539,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 		goto bail_regions;
 	}
 
-	ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
-	if (ret) {
-		ipath_dev_err(dd, "Failed to read PCI revision ID unit "
-			      "%u: err %d\n", dd->ipath_unit, -ret);
-		goto bail_regions;	/* shouldn't ever happen */
-	}
-	dd->ipath_pcirev = rev;
+	dd->ipath_pcirev = pdev->revision;
 
 #if defined(__powerpc__)
 	/* There isn't a generic way to specify writethrough mappings */
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 33c7eedaba6c..e74cdf9ef471 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2563,7 +2563,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
 	u16 last_ae;
 	u8 original_hw_tcp_state;
 	u8 original_ibqp_state;
-	enum iw_cm_event_status disconn_status = IW_CM_EVENT_STATUS_OK;
+	int disconn_status = 0;
 	int issue_disconn = 0;
 	int issue_close = 0;
 	int issue_flush = 0;
@@ -2605,7 +2605,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
 			(last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
 		issue_disconn = 1;
 		if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
-			disconn_status = IW_CM_EVENT_STATUS_RESET;
+			disconn_status = -ECONNRESET;
 	}
 
 	if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
@@ -2666,7 +2666,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
 			cm_id->provider_data = nesqp;
 			/* Send up the close complete event */
 			cm_event.event = IW_CM_EVENT_CLOSE;
-			cm_event.status = IW_CM_EVENT_STATUS_OK;
+			cm_event.status = 0;
 			cm_event.provider_data = cm_id->provider_data;
 			cm_event.local_addr = cm_id->local_addr;
 			cm_event.remote_addr = cm_id->remote_addr;
@@ -2966,7 +2966,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	nes_add_ref(&nesqp->ibqp);
 
 	cm_event.event = IW_CM_EVENT_ESTABLISHED;
-	cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+	cm_event.status = 0;
 	cm_event.provider_data = (void *)nesqp;
 	cm_event.local_addr = cm_id->local_addr;
 	cm_event.remote_addr = cm_id->remote_addr;
@@ -3377,7 +3377,7 @@ static void cm_event_connected(struct nes_cm_event *event)
 
 	/* notify OF layer we successfully created the requested connection */
 	cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-	cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+	cm_event.status = 0;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.local_addr.sin_family = AF_INET;
 	cm_event.local_addr.sin_port = cm_id->local_addr.sin_port;
@@ -3484,7 +3484,7 @@ static void cm_event_reset(struct nes_cm_event *event)
 	nesqp->cm_id = NULL;
 	/* cm_id->provider_data = NULL; */
 	cm_event.event = IW_CM_EVENT_DISCONNECT;
-	cm_event.status = IW_CM_EVENT_STATUS_RESET;
+	cm_event.status = -ECONNRESET;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.local_addr = cm_id->local_addr;
 	cm_event.remote_addr = cm_id->remote_addr;
@@ -3495,7 +3495,7 @@ static void cm_event_reset(struct nes_cm_event *event)
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	atomic_inc(&cm_closes);
 	cm_event.event = IW_CM_EVENT_CLOSE;
-	cm_event.status = IW_CM_EVENT_STATUS_OK;
+	cm_event.status = 0;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.local_addr = cm_id->local_addr;
 	cm_event.remote_addr = cm_id->remote_addr;
@@ -3534,7 +3534,7 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
 			cm_node, cm_id, jiffies);
 
 	cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
-	cm_event.status = IW_CM_EVENT_STATUS_OK;
+	cm_event.status = 0;
 	cm_event.provider_data = (void *)cm_node;
 
 	cm_event.local_addr.sin_family = AF_INET;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 26d8018c0a7c..95ca93ceedac 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1484,7 +1484,7 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
 			(nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) {
 		cm_id = nesqp->cm_id;
 		cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-		cm_event.status = IW_CM_EVENT_STATUS_TIMEOUT;
+		cm_event.status = -ETIMEDOUT;
 		cm_event.local_addr = cm_id->local_addr;
 		cm_event.remote_addr = cm_id->remote_addr;
 		cm_event.private_data = NULL;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 7de4b7ebffc5..d8ca0a0b970d 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1799,7 +1799,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
 	/*
 	 * Keep chip from being accessed until we are ready.  Use
 	 * writeq() directly, to allow the write even though QIB_PRESENT
-	 * isn't' set.
+	 * isn't set.
 	 */
 	dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
 	dd->int_counter = 0; /* so we check interrupts work again */
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 74fe0360bec7..c765a2eb04cf 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2111,7 +2111,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
 	/*
 	 * Keep chip from being accessed until we are ready.  Use
 	 * writeq() directly, to allow the write even though QIB_PRESENT
-	 * isn't' set.
+	 * isn't set.
 	 */
 	dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
 	dd->int_counter = 0; /* so we check interrupts work again */
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 55de3cf3441c..9f53e68a096a 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -3299,7 +3299,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
 	/*
 	 * Keep chip from being accessed until we are ready.  Use
 	 * writeq() directly, to allow the write even though QIB_PRESENT
-	 * isn't' set.
+	 * isn't set.
 	 */
 	dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR);
 	dd->flags |= QIB_DOING_RESET;
@@ -7534,7 +7534,8 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
 	ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10));
 	tstart = get_jiffies_64();
 	while (chan_done &&
-	       !time_after64(tstart, tstart + msecs_to_jiffies(500))) {
+	       !time_after64(get_jiffies_64(),
+			tstart + msecs_to_jiffies(500))) {
 		msleep(20);
 		for (chan = 0; chan < SERDES_CHANS; ++chan) {
 			rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 48b6674cbc49..891cc2ff5f00 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -526,11 +526,8 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
 	 */
 	devid = parent->device;
 	if (devid >= 0x25e2 && devid <= 0x25fa) {
-		u8 rev;
-
 		/* 5000 P/V/X/Z */
-		pci_read_config_byte(parent, PCI_REVISION_ID, &rev);
-		if (rev <= 0xb2)
+		if (parent->revision <= 0xb2)
 			bits = 1U << 10;
 		else
 			bits = 7U << 10;
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 7f42d3a454d2..88d8e4cb419a 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -39,13 +39,13 @@ struct evdev {
 };
 
 struct evdev_client {
-	int head;
-	int tail;
+	unsigned int head;
+	unsigned int tail;
 	spinlock_t buffer_lock; /* protects access to buffer, head and tail */
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
-	int bufsize;
+	unsigned int bufsize;
 	struct input_event buffer[];
 };
 
@@ -55,16 +55,25 @@ static DEFINE_MUTEX(evdev_table_mutex);
 static void evdev_pass_event(struct evdev_client *client,
 			     struct input_event *event)
 {
-	/*
-	 * Interrupts are disabled, just acquire the lock.
-	 * Make sure we don't leave with the client buffer
-	 * "empty" by having client->head == client->tail.
-	 */
+	/* Interrupts are disabled, just acquire the lock. */
 	spin_lock(&client->buffer_lock);
-	do {
-		client->buffer[client->head++] = *event;
-		client->head &= client->bufsize - 1;
-	} while (client->head == client->tail);
+
+	client->buffer[client->head++] = *event;
+	client->head &= client->bufsize - 1;
+
+	if (unlikely(client->head == client->tail)) {
+		/*
+		 * This effectively "drops" all unconsumed events, leaving
+		 * EV_SYN/SYN_DROPPED plus the newest event in the queue.
+		 */
+		client->tail = (client->head - 2) & (client->bufsize - 1);
+
+		client->buffer[client->tail].time = event->time;
+		client->buffer[client->tail].type = EV_SYN;
+		client->buffer[client->tail].code = SYN_DROPPED;
+		client->buffer[client->tail].value = 0;
+	}
+
 	spin_unlock(&client->buffer_lock);
 
 	if (event->type == EV_SYN)
diff --git a/drivers/input/input.c b/drivers/input/input.c
index d6e8bd8a851c..ebbceedc92f4 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1746,6 +1746,42 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int
 }
 EXPORT_SYMBOL(input_set_capability);
 
+static unsigned int input_estimate_events_per_packet(struct input_dev *dev)
+{
+	int mt_slots;
+	int i;
+	unsigned int events;
+
+	if (dev->mtsize) {
+		mt_slots = dev->mtsize;
+	} else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) {
+		mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum -
+			   dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1,
+		clamp(mt_slots, 2, 32);
+	} else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) {
+		mt_slots = 2;
+	} else {
+		mt_slots = 0;
+	}
+
+	events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */
+
+	for (i = 0; i < ABS_CNT; i++) {
+		if (test_bit(i, dev->absbit)) {
+			if (input_is_mt_axis(i))
+				events += mt_slots;
+			else
+				events++;
+		}
+	}
+
+	for (i = 0; i < REL_CNT; i++)
+		if (test_bit(i, dev->relbit))
+			events++;
+
+	return events;
+}
+
 #define INPUT_CLEANSE_BITMASK(dev, type, bits)				\
 	do {								\
 		if (!test_bit(EV_##type, dev->evbit))			\
@@ -1793,6 +1829,10 @@ int input_register_device(struct input_dev *dev)
 	/* Make sure that bitmasks not mentioned in dev->evbit are clean. */
 	input_cleanse_bitmasks(dev);
 
+	if (!dev->hint_events_per_packet)
+		dev->hint_events_per_packet =
+				input_estimate_events_per_packet(dev);
+
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
 	 * is handled by the driver itself and we don't do it in input.c.
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index 1839194ea987..10bcd4ae5402 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -223,8 +223,9 @@ static int __init atakbd_init(void)
 		return -ENODEV;
 
 	// need to init core driver if not already done so
-	if (atari_keyb_init())
-		return -ENODEV;
+	error = atari_keyb_init();
+	if (error)
+		return error;
 
 	atakbd_dev = input_allocate_device();
 	if (!atakbd_dev)
diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c
index 09bef79d9da1..a26922cf0e84 100644
--- a/drivers/input/keyboard/twl4030_keypad.c
+++ b/drivers/input/keyboard/twl4030_keypad.c
@@ -332,18 +332,20 @@ static int __devinit twl4030_kp_program(struct twl4030_keypad *kp)
 static int __devinit twl4030_kp_probe(struct platform_device *pdev)
 {
 	struct twl4030_keypad_data *pdata = pdev->dev.platform_data;
-	const struct matrix_keymap_data *keymap_data = pdata->keymap_data;
+	const struct matrix_keymap_data *keymap_data;
 	struct twl4030_keypad *kp;
 	struct input_dev *input;
 	u8 reg;
 	int error;
 
-	if (!pdata || !pdata->rows || !pdata->cols ||
+	if (!pdata || !pdata->rows || !pdata->cols || !pdata->keymap_data ||
 	    pdata->rows > TWL4030_MAX_ROWS || pdata->cols > TWL4030_MAX_COLS) {
 		dev_err(&pdev->dev, "Invalid platform_data\n");
 		return -EINVAL;
 	}
 
+	keymap_data = pdata->keymap_data;
+
 	kp = kzalloc(sizeof(*kp), GFP_KERNEL);
 	input = input_allocate_device();
 	if (!kp || !input) {
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 7077f9bf5ead..62bae99424e6 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -303,7 +303,7 @@ static void xenkbd_backend_changed(struct xenbus_device *dev,
 				   enum xenbus_state backend_state)
 {
 	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
-	int val;
+	int ret, val;
 
 	switch (backend_state) {
 	case XenbusStateInitialising:
@@ -316,6 +316,17 @@ static void xenkbd_backend_changed(struct xenbus_device *dev,
 
 	case XenbusStateInitWait:
 InitWait:
+		ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+				   "feature-abs-pointer", "%d", &val);
+		if (ret < 0)
+			val = 0;
+		if (val) {
+			ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
+					    "request-abs-pointer", "1");
+			if (ret)
+				pr_warning("xenkbd: can't request abs-pointer");
+		}
+
 		xenbus_switch_state(dev, XenbusStateConnected);
 		break;
 
diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c
index adf45b3040e9..5c4a692bf73a 100644
--- a/drivers/input/mouse/atarimouse.c
+++ b/drivers/input/mouse/atarimouse.c
@@ -77,15 +77,15 @@ static void atamouse_interrupt(char *buf)
 #endif
 
 	/* only relative events get here */
-	dx =  buf[1];
-	dy = -buf[2];
+	dx = buf[1];
+	dy = buf[2];
 
 	input_report_rel(atamouse_dev, REL_X, dx);
 	input_report_rel(atamouse_dev, REL_Y, dy);
 
-	input_report_key(atamouse_dev, BTN_LEFT,   buttons & 0x1);
+	input_report_key(atamouse_dev, BTN_LEFT,   buttons & 0x4);
 	input_report_key(atamouse_dev, BTN_MIDDLE, buttons & 0x2);
-	input_report_key(atamouse_dev, BTN_RIGHT,  buttons & 0x4);
+	input_report_key(atamouse_dev, BTN_RIGHT,  buttons & 0x1);
 
 	input_sync(atamouse_dev);
 
@@ -108,7 +108,7 @@ static int atamouse_open(struct input_dev *dev)
 static void atamouse_close(struct input_dev *dev)
 {
 	ikbd_mouse_disable();
-	atari_mouse_interrupt_hook = NULL;
+	atari_input_mouse_interrupt_hook = NULL;
 }
 
 static int __init atamouse_init(void)
@@ -118,8 +118,9 @@ static int __init atamouse_init(void)
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP))
 		return -ENODEV;
 
-	if (!atari_keyb_init())
-		return -ENODEV;
+	error = atari_keyb_init();
+	if (error)
+		return error;
 
 	atamouse_dev = input_allocate_device();
 	if (!atamouse_dev)
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index c24946f51256..1de1c19dad30 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -281,17 +281,24 @@ struct ser_req {
 	u8			command;
 	u8			ref_off;
 	u16			scratch;
-	__be16			sample;
 	struct spi_message	msg;
 	struct spi_transfer	xfer[6];
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	__be16 sample ____cacheline_aligned;
 };
 
 struct ads7845_ser_req {
 	u8			command[3];
-	u8			pwrdown[3];
-	u8			sample[3];
 	struct spi_message	msg;
 	struct spi_transfer	xfer[2];
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	u8 sample[3] ____cacheline_aligned;
 };
 
 static int ads7846_read12_ser(struct device *dev, unsigned command)
diff --git a/drivers/input/touchscreen/h3600_ts_input.c b/drivers/input/touchscreen/h3600_ts_input.c
index efa06882de00..45f93d0f5592 100644
--- a/drivers/input/touchscreen/h3600_ts_input.c
+++ b/drivers/input/touchscreen/h3600_ts_input.c
@@ -399,31 +399,34 @@ static int h3600ts_connect(struct serio *serio, struct serio_driver *drv)
 			IRQF_SHARED | IRQF_DISABLED, "h3600_action", &ts->dev)) {
 		printk(KERN_ERR "h3600ts.c: Could not allocate Action Button IRQ!\n");
 		err = -EBUSY;
-		goto fail2;
+		goto fail1;
 	}
 
 	if (request_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, npower_button_handler,
 			IRQF_SHARED | IRQF_DISABLED, "h3600_suspend", &ts->dev)) {
 		printk(KERN_ERR "h3600ts.c: Could not allocate Power Button IRQ!\n");
 		err = -EBUSY;
-		goto fail3;
+		goto fail2;
 	}
 
 	serio_set_drvdata(serio, ts);
 
 	err = serio_open(serio, drv);
 	if (err)
-		return err;
+		goto fail3;
 
 	//h3600_flite_control(1, 25);     /* default brightness */
-	input_register_device(ts->dev);
+	err = input_register_device(ts->dev);
+	if (err)
+		goto fail4;
 
 	return 0;
 
-fail3:	free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev);
+fail4:	serio_close(serio);
+fail3:	serio_set_drvdata(serio, NULL);
+	free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev);
 fail2:	free_irq(IRQ_GPIO_BITSY_ACTION_BUTTON, ts->dev);
-fail1:	serio_set_drvdata(serio, NULL);
-	input_free_device(input_dev);
+fail1:	input_free_device(input_dev);
 	kfree(ts);
 	return err;
 }
diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index 6ae054f8e0aa..9175d49d2546 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c
@@ -68,8 +68,23 @@ struct wm831x_ts {
 	unsigned int pd_irq;
 	bool pressure;
 	bool pen_down;
+	struct work_struct pd_data_work;
 };
 
+static void wm831x_pd_data_work(struct work_struct *work)
+{
+	struct wm831x_ts *wm831x_ts =
+		container_of(work, struct wm831x_ts, pd_data_work);
+
+	if (wm831x_ts->pen_down) {
+		enable_irq(wm831x_ts->data_irq);
+		dev_dbg(wm831x_ts->wm831x->dev, "IRQ PD->DATA done\n");
+	} else {
+		enable_irq(wm831x_ts->pd_irq);
+		dev_dbg(wm831x_ts->wm831x->dev, "IRQ DATA->PD done\n");
+	}
+}
+
 static irqreturn_t wm831x_ts_data_irq(int irq, void *irq_data)
 {
 	struct wm831x_ts *wm831x_ts = irq_data;
@@ -110,6 +125,9 @@ static irqreturn_t wm831x_ts_data_irq(int irq, void *irq_data)
 	}
 
 	if (!wm831x_ts->pen_down) {
+		/* Switch from data to pen down */
+		dev_dbg(wm831x->dev, "IRQ DATA->PD\n");
+
 		disable_irq_nosync(wm831x_ts->data_irq);
 
 		/* Don't need data any more */
@@ -128,6 +146,10 @@ static irqreturn_t wm831x_ts_data_irq(int irq, void *irq_data)
 					 ABS_PRESSURE, 0);
 
 		input_report_key(wm831x_ts->input_dev, BTN_TOUCH, 0);
+
+		schedule_work(&wm831x_ts->pd_data_work);
+	} else {
+		input_report_key(wm831x_ts->input_dev, BTN_TOUCH, 1);
 	}
 
 	input_sync(wm831x_ts->input_dev);
@@ -141,6 +163,11 @@ static irqreturn_t wm831x_ts_pen_down_irq(int irq, void *irq_data)
 	struct wm831x *wm831x = wm831x_ts->wm831x;
 	int ena = 0;
 
+	if (wm831x_ts->pen_down)
+		return IRQ_HANDLED;
+
+	disable_irq_nosync(wm831x_ts->pd_irq);
+
 	/* Start collecting data */
 	if (wm831x_ts->pressure)
 		ena |= WM831X_TCH_Z_ENA;
@@ -149,14 +176,14 @@ static irqreturn_t wm831x_ts_pen_down_irq(int irq, void *irq_data)
 			WM831X_TCH_X_ENA | WM831X_TCH_Y_ENA | WM831X_TCH_Z_ENA,
 			WM831X_TCH_X_ENA | WM831X_TCH_Y_ENA | ena);
 
-	input_report_key(wm831x_ts->input_dev, BTN_TOUCH, 1);
-	input_sync(wm831x_ts->input_dev);
-
 	wm831x_set_bits(wm831x, WM831X_INTERRUPT_STATUS_1,
 			WM831X_TCHPD_EINT, WM831X_TCHPD_EINT);
 
 	wm831x_ts->pen_down = true;
-	enable_irq(wm831x_ts->data_irq);
+
+	/* Switch from pen down to data */
+	dev_dbg(wm831x->dev, "IRQ PD->DATA\n");
+	schedule_work(&wm831x_ts->pd_data_work);
 
 	return IRQ_HANDLED;
 }
@@ -182,13 +209,28 @@ static void wm831x_ts_input_close(struct input_dev *idev)
 	struct wm831x_ts *wm831x_ts = input_get_drvdata(idev);
 	struct wm831x *wm831x = wm831x_ts->wm831x;
 
+	/* Shut the controller down, disabling all other functionality too */
 	wm831x_set_bits(wm831x, WM831X_TOUCH_CONTROL_1,
-			WM831X_TCH_ENA | WM831X_TCH_CVT_ENA |
-			WM831X_TCH_X_ENA | WM831X_TCH_Y_ENA |
-			WM831X_TCH_Z_ENA, 0);
+			WM831X_TCH_ENA | WM831X_TCH_X_ENA |
+			WM831X_TCH_Y_ENA | WM831X_TCH_Z_ENA, 0);
 
-	if (wm831x_ts->pen_down)
+	/* Make sure any pending IRQs are done, the above will prevent
+	 * new ones firing.
+	 */
+	synchronize_irq(wm831x_ts->data_irq);
+	synchronize_irq(wm831x_ts->pd_irq);
+
+	/* Make sure the IRQ completion work is quiesced */
+	flush_work_sync(&wm831x_ts->pd_data_work);
+
+	/* If we ended up with the pen down then make sure we revert back
+	 * to pen detection state for the next time we start up.
+	 */
+	if (wm831x_ts->pen_down) {
 		disable_irq(wm831x_ts->data_irq);
+		enable_irq(wm831x_ts->pd_irq);
+		wm831x_ts->pen_down = false;
+	}
 }
 
 static __devinit int wm831x_ts_probe(struct platform_device *pdev)
@@ -198,7 +240,7 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 	struct wm831x_pdata *core_pdata = dev_get_platdata(pdev->dev.parent);
 	struct wm831x_touch_pdata *pdata = NULL;
 	struct input_dev *input_dev;
-	int error;
+	int error, irqf;
 
 	if (core_pdata)
 		pdata = core_pdata->touch;
@@ -212,6 +254,7 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 
 	wm831x_ts->wm831x = wm831x;
 	wm831x_ts->input_dev = input_dev;
+	INIT_WORK(&wm831x_ts->pd_data_work, wm831x_pd_data_work);
 
 	/*
 	 * If we have a direct IRQ use it, otherwise use the interrupt
@@ -270,9 +313,14 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 	wm831x_set_bits(wm831x, WM831X_TOUCH_CONTROL_1,
 			WM831X_TCH_RATE_MASK, 6);
 
+	if (pdata && pdata->data_irqf)
+		irqf = pdata->data_irqf;
+	else
+		irqf = IRQF_TRIGGER_HIGH;
+
 	error = request_threaded_irq(wm831x_ts->data_irq,
 				     NULL, wm831x_ts_data_irq,
-				     IRQF_ONESHOT,
+				     irqf | IRQF_ONESHOT,
 				     "Touchscreen data", wm831x_ts);
 	if (error) {
 		dev_err(&pdev->dev, "Failed to request data IRQ %d: %d\n",
@@ -281,9 +329,14 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 	}
 	disable_irq(wm831x_ts->data_irq);
 
+	if (pdata && pdata->pd_irqf)
+		irqf = pdata->pd_irqf;
+	else
+		irqf = IRQF_TRIGGER_HIGH;
+
 	error = request_threaded_irq(wm831x_ts->pd_irq,
 				     NULL, wm831x_ts_pen_down_irq,
-				     IRQF_ONESHOT,
+				     irqf | IRQF_ONESHOT,
 				     "Touchscreen pen down", wm831x_ts);
 	if (error) {
 		dev_err(&pdev->dev, "Failed to request pen down IRQ %d: %d\n",
diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c
index e7089a1f6cb6..b37e6186d0fa 100644
--- a/drivers/leds/leds-lm3530.c
+++ b/drivers/leds/leds-lm3530.c
@@ -349,6 +349,7 @@ static const struct i2c_device_id lm3530_id[] = {
 	{LM3530_NAME, 0},
 	{}
 };
+MODULE_DEVICE_TABLE(i2c, lm3530_id);
 
 static struct i2c_driver lm3530_i2c_driver = {
 	.probe = lm3530_probe,
diff --git a/drivers/leds/leds-regulator.c b/drivers/leds/leds-regulator.c
index 3790816643be..8497f56f8e46 100644
--- a/drivers/leds/leds-regulator.c
+++ b/drivers/leds/leds-regulator.c
@@ -178,6 +178,10 @@ static int __devinit regulator_led_probe(struct platform_device *pdev)
 	led->cdev.flags |= LED_CORE_SUSPENDRESUME;
 	led->vcc = vcc;
 
+	/* to handle correctly an already enabled regulator */
+	if (regulator_is_enabled(led->vcc))
+		led->enabled = 1;
+
 	mutex_init(&led->mutex);
 	INIT_WORK(&led->work, led_work);
 
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 0aaa0597a622..34ae49dc557c 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -5,8 +5,10 @@ config LGUEST
 	---help---
 	  This is a very simple module which allows you to run
 	  multiple instances of the same Linux kernel, using the
-	  "lguest" command found in the Documentation/lguest directory.
+	  "lguest" command found in the Documentation/virtual/lguest
+	  directory.
+
 	  Note that "lguest" is pronounced to rhyme with "fell quest",
-	  not "rustyvisor".  See Documentation/lguest/lguest.txt.
+	  not "rustyvisor". See Documentation/virtual/lguest/lguest.txt.
 
 	  If unsure, say N.  If curious, say M.  If masochistic, say Y.
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index 7d463c26124f..8ac947c7e7c7 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -18,7 +18,7 @@ Mastery: PREFIX=M
 Beer:
 	@for f in Preparation Guest Drivers Launcher Host Switcher Mastery; do echo "{==- $$f -==}"; make -s $$f; done; echo "{==-==}"
 Preparation Preparation! Guest Drivers Launcher Host Switcher Mastery:
-	@sh ../../Documentation/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
+	@sh ../../Documentation/virtual/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
 Puppy:
 	@clear
 	@printf "      __  \n (___()'\`;\n /,    /\`\n \\\\\\\"--\\\\\\   \n"
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 8b021eb0d48c..6cccd60c594e 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -40,7 +40,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/freezer.h>
 #include <linux/syscalls.h>
 #include <linux/suspend.h>
@@ -2527,12 +2527,9 @@ void pmu_blink(int n)
 #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32)
 int pmu_sys_suspended;
 
-static int pmu_sys_suspend(struct sys_device *sysdev, pm_message_t state)
+static int pmu_syscore_suspend(void)
 {
-	if (state.event != PM_EVENT_SUSPEND || pmu_sys_suspended)
-		return 0;
-
-	/* Suspend PMU event interrupts */\
+	/* Suspend PMU event interrupts */
 	pmu_suspend();
 	pmu_sys_suspended = 1;
 
@@ -2544,12 +2541,12 @@ static int pmu_sys_suspend(struct sys_device *sysdev, pm_message_t state)
 	return 0;
 }
 
-static int pmu_sys_resume(struct sys_device *sysdev)
+static void pmu_syscore_resume(void)
 {
 	struct adb_request req;
 
 	if (!pmu_sys_suspended)
-		return 0;
+		return;
 
 	/* Tell PMU we are ready */
 	pmu_request(&req, NULL, 2, PMU_SYSTEM_READY, 2);
@@ -2562,50 +2559,21 @@ static int pmu_sys_resume(struct sys_device *sysdev)
 	/* Resume PMU event interrupts */
 	pmu_resume();
 	pmu_sys_suspended = 0;
-
-	return 0;
 }
 
-#endif /* CONFIG_SUSPEND && CONFIG_PPC32 */
-
-static struct sysdev_class pmu_sysclass = {
-	.name = "pmu",
-};
-
-static struct sys_device device_pmu = {
-	.cls		= &pmu_sysclass,
-};
-
-static struct sysdev_driver driver_pmu = {
-#if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32)
-	.suspend	= &pmu_sys_suspend,
-	.resume		= &pmu_sys_resume,
-#endif /* CONFIG_SUSPEND && CONFIG_PPC32 */
+static struct syscore_ops pmu_syscore_ops = {
+	.suspend = pmu_syscore_suspend,
+	.resume = pmu_syscore_resume,
 };
 
-static int __init init_pmu_sysfs(void)
+static int pmu_syscore_register(void)
 {
-	int rc;
+	register_syscore_ops(&pmu_syscore_ops);
 
-	rc = sysdev_class_register(&pmu_sysclass);
-	if (rc) {
-		printk(KERN_ERR "Failed registering PMU sys class\n");
-		return -ENODEV;
-	}
-	rc = sysdev_register(&device_pmu);
-	if (rc) {
-		printk(KERN_ERR "Failed registering PMU sys device\n");
-		return -ENODEV;
-	}
-	rc = sysdev_driver_register(&pmu_sysclass, &driver_pmu);
-	if (rc) {
-		printk(KERN_ERR "Failed registering PMU sys driver\n");
-		return -ENODEV;
-	}
 	return 0;
 }
-
-subsys_initcall(init_pmu_sysfs);
+subsys_initcall(pmu_syscore_register);
+#endif /* CONFIG_SUSPEND && CONFIG_PPC32 */
 
 EXPORT_SYMBOL(pmu_request);
 EXPORT_SYMBOL(pmu_queue_request);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5ef136cdba91..e5d8904fc8f6 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
 	return md_raid5_congested(&rs->md, bits);
 }
 
-static void raid_unplug(struct dm_target_callbacks *cb)
-{
-	struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
-
-	md_raid5_kick_device(rs->md.private);
-}
-
 /*
  * Construct a RAID4/5/6 mapping:
  * Args:
@@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	}
 
 	rs->callbacks.congested_fn = raid_is_congested;
-	rs->callbacks.unplug_fn = raid_unplug;
 	dm_table_add_target_callbacks(ti->table, &rs->callbacks);
 
 	return 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b12b3776c0c0..7d6f7f18a920 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -447,48 +447,59 @@ EXPORT_SYMBOL(md_flush_request);
 
 /* Support for plugging.
  * This mirrors the plugging support in request_queue, but does not
- * require having a whole queue
+ * require having a whole queue or request structures.
+ * We allocate an md_plug_cb for each md device and each thread it gets
+ * plugged on.  This links tot the private plug_handle structure in the
+ * personality data where we keep a count of the number of outstanding
+ * plugs so other code can see if a plug is active.
  */
-static void plugger_work(struct work_struct *work)
-{
-	struct plug_handle *plug =
-		container_of(work, struct plug_handle, unplug_work);
-	plug->unplug_fn(plug);
-}
-static void plugger_timeout(unsigned long data)
-{
-	struct plug_handle *plug = (void *)data;
-	kblockd_schedule_work(NULL, &plug->unplug_work);
-}
-void plugger_init(struct plug_handle *plug,
-		  void (*unplug_fn)(struct plug_handle *))
-{
-	plug->unplug_flag = 0;
-	plug->unplug_fn = unplug_fn;
-	init_timer(&plug->unplug_timer);
-	plug->unplug_timer.function = plugger_timeout;
-	plug->unplug_timer.data = (unsigned long)plug;
-	INIT_WORK(&plug->unplug_work, plugger_work);
-}
-EXPORT_SYMBOL_GPL(plugger_init);
+struct md_plug_cb {
+	struct blk_plug_cb cb;
+	mddev_t *mddev;
+};
 
-void plugger_set_plug(struct plug_handle *plug)
+static void plugger_unplug(struct blk_plug_cb *cb)
 {
-	if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag))
-		mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1);
+	struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb);
+	if (atomic_dec_and_test(&mdcb->mddev->plug_cnt))
+		md_wakeup_thread(mdcb->mddev->thread);
+	kfree(mdcb);
 }
-EXPORT_SYMBOL_GPL(plugger_set_plug);
 
-int plugger_remove_plug(struct plug_handle *plug)
+/* Check that an unplug wakeup will come shortly.
+ * If not, wakeup the md thread immediately
+ */
+int mddev_check_plugged(mddev_t *mddev)
 {
-	if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) {
-		del_timer(&plug->unplug_timer);
-		return 1;
-	} else
+	struct blk_plug *plug = current->plug;
+	struct md_plug_cb *mdcb;
+
+	if (!plug)
 		return 0;
-}
-EXPORT_SYMBOL_GPL(plugger_remove_plug);
 
+	list_for_each_entry(mdcb, &plug->cb_list, cb.list) {
+		if (mdcb->cb.callback == plugger_unplug &&
+		    mdcb->mddev == mddev) {
+			/* Already on the list, move to top */
+			if (mdcb != list_first_entry(&plug->cb_list,
+						    struct md_plug_cb,
+						    cb.list))
+				list_move(&mdcb->cb.list, &plug->cb_list);
+			return 1;
+		}
+	}
+	/* Not currently on the callback list */
+	mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC);
+	if (!mdcb)
+		return 0;
+
+	mdcb->mddev = mddev;
+	mdcb->cb.callback = plugger_unplug;
+	atomic_inc(&mddev->plug_cnt);
+	list_add(&mdcb->cb.list, &plug->cb_list);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(mddev_check_plugged);
 
 static inline mddev_t *mddev_get(mddev_t *mddev)
 {
@@ -538,6 +549,7 @@ void mddev_init(mddev_t *mddev)
 	atomic_set(&mddev->active, 1);
 	atomic_set(&mddev->openers, 0);
 	atomic_set(&mddev->active_io, 0);
+	atomic_set(&mddev->plug_cnt, 0);
 	spin_lock_init(&mddev->write_lock);
 	atomic_set(&mddev->flush_pending, 0);
 	init_waitqueue_head(&mddev->sb_wait);
@@ -3158,6 +3170,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 	mddev->layout = mddev->new_layout;
 	mddev->chunk_sectors = mddev->new_chunk_sectors;
 	mddev->delta_disks = 0;
+	mddev->degraded = 0;
 	if (mddev->pers->sync_request == NULL) {
 		/* this is now an array without redundancy, so
 		 * it must always be in_sync
@@ -4723,7 +4736,6 @@ static void md_clean(mddev_t *mddev)
 	mddev->bitmap_info.chunksize = 0;
 	mddev->bitmap_info.daemon_sleep = 0;
 	mddev->bitmap_info.max_write_behind = 0;
-	mddev->plug = NULL;
 }
 
 static void __md_stop_writes(mddev_t *mddev)
@@ -6688,12 +6700,6 @@ int md_allow_write(mddev_t *mddev)
 }
 EXPORT_SYMBOL_GPL(md_allow_write);
 
-void md_unplug(mddev_t *mddev)
-{
-	if (mddev->plug)
-		mddev->plug->unplug_fn(mddev->plug);
-}
-
 #define SYNC_MARKS	10
 #define	SYNC_MARK_STEP	(3*HZ)
 void md_do_sync(mddev_t *mddev)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 52b407369e13..0b1fd3f1d85b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -29,26 +29,6 @@
 typedef struct mddev_s mddev_t;
 typedef struct mdk_rdev_s mdk_rdev_t;
 
-/* generic plugging support - like that provided with request_queue,
- * but does not require a request_queue
- */
-struct plug_handle {
-	void			(*unplug_fn)(struct plug_handle *);
-	struct timer_list	unplug_timer;
-	struct work_struct	unplug_work;
-	unsigned long		unplug_flag;
-};
-#define	PLUGGED_FLAG 1
-void plugger_init(struct plug_handle *plug,
-		  void (*unplug_fn)(struct plug_handle *));
-void plugger_set_plug(struct plug_handle *plug);
-int plugger_remove_plug(struct plug_handle *plug);
-static inline void plugger_flush(struct plug_handle *plug)
-{
-	del_timer_sync(&plug->unplug_timer);
-	cancel_work_sync(&plug->unplug_work);
-}
-
 /*
  * MD's 'extended' device
  */
@@ -199,6 +179,9 @@ struct mddev_s
 	int				delta_disks, new_level, new_layout;
 	int				new_chunk_sectors;
 
+	atomic_t			plug_cnt;	/* If device is expecting
+							 * more bios soon.
+							 */
 	struct mdk_thread_s		*thread;	/* management thread */
 	struct mdk_thread_s		*sync_thread;	/* doing resync or reconstruct */
 	sector_t			curr_resync;	/* last block scheduled */
@@ -336,7 +319,6 @@ struct mddev_s
 	struct list_head		all_mddevs;
 
 	struct attribute_group		*to_remove;
-	struct plug_handle		*plug; /* if used by personality */
 
 	struct bio_set			*bio_set;
 
@@ -516,7 +498,6 @@ extern int md_integrity_register(mddev_t *mddev);
 extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
 extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
 extern void restore_bitmap_write_access(struct file *file);
-extern void md_unplug(mddev_t *mddev);
 
 extern void mddev_init(mddev_t *mddev);
 extern int md_run(mddev_t *mddev);
@@ -530,4 +511,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
 				   mddev_t *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 				   mddev_t *mddev);
+extern int mddev_check_plugged(mddev_t *mddev);
 #endif /* _MD_MD_H */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c2a21ae56d97..2b7a7ff401dc 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -565,12 +565,6 @@ static void flush_pending_writes(conf_t *conf)
 		spin_unlock_irq(&conf->device_lock);
 }
 
-static void md_kick_device(mddev_t *mddev)
-{
-	blk_flush_plug(current);
-	md_wakeup_thread(mddev->thread);
-}
-
 /* Barriers....
  * Sometimes we need to suspend IO while we do something else,
  * either some resync/recovery, or reconfigure the array.
@@ -600,7 +594,7 @@ static void raise_barrier(conf_t *conf)
 
 	/* Wait until no block IO is waiting */
 	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-			    conf->resync_lock, md_kick_device(conf->mddev));
+			    conf->resync_lock, );
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -608,7 +602,7 @@ static void raise_barrier(conf_t *conf)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, md_kick_device(conf->mddev));
+			    conf->resync_lock, );
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -630,7 +624,7 @@ static void wait_barrier(conf_t *conf)
 		conf->nr_waiting++;
 		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
 				    conf->resync_lock,
-				    md_kick_device(conf->mddev));
+				    );
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -666,8 +660,7 @@ static void freeze_array(conf_t *conf)
 	wait_event_lock_irq(conf->wait_barrier,
 			    conf->nr_pending == conf->nr_queued+1,
 			    conf->resync_lock,
-			    ({ flush_pending_writes(conf);
-			       md_kick_device(conf->mddev); }));
+			    flush_pending_writes(conf));
 	spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(conf_t *conf)
@@ -729,6 +722,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
 	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
 	mdk_rdev_t *blocked_rdev;
+	int plugged;
 
 	/*
 	 * Register the new request and wait if the reconstruction
@@ -820,6 +814,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	 * inc refcount on their rdev.  Record them by setting
 	 * bios[x] to bio
 	 */
+	plugged = mddev_check_plugged(mddev);
+
 	disks = conf->raid_disks;
  retry_write:
 	blocked_rdev = NULL;
@@ -925,7 +921,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	/* In case raid1d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
 
-	if (do_sync || !bitmap)
+	if (do_sync || !bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
 
 	return 0;
@@ -1516,13 +1512,16 @@ static void raid1d(mddev_t *mddev)
 	conf_t *conf = mddev->private;
 	struct list_head *head = &conf->retry_list;
 	mdk_rdev_t *rdev;
+	struct blk_plug plug;
 
 	md_check_recovery(mddev);
-	
+
+	blk_start_plug(&plug);
 	for (;;) {
 		char b[BDEVNAME_SIZE];
 
-		flush_pending_writes(conf);
+		if (atomic_read(&mddev->plug_cnt) == 0)
+			flush_pending_writes(conf);
 
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (list_empty(head)) {
@@ -1593,6 +1592,7 @@ static void raid1d(mddev_t *mddev)
 		}
 		cond_resched();
 	}
+	blk_finish_plug(&plug);
 }
 
 
@@ -2039,7 +2039,6 @@ static int stop(mddev_t *mddev)
 
 	md_unregister_thread(mddev->thread);
 	mddev->thread = NULL;
-	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	if (conf->r1bio_pool)
 		mempool_destroy(conf->r1bio_pool);
 	kfree(conf->mirrors);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 2da83d566592..8e9462626ec5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -634,12 +634,6 @@ static void flush_pending_writes(conf_t *conf)
 		spin_unlock_irq(&conf->device_lock);
 }
 
-static void md_kick_device(mddev_t *mddev)
-{
-	blk_flush_plug(current);
-	md_wakeup_thread(mddev->thread);
-}
-
 /* Barriers....
  * Sometimes we need to suspend IO while we do something else,
  * either some resync/recovery, or reconfigure the array.
@@ -669,15 +663,15 @@ static void raise_barrier(conf_t *conf, int force)
 
 	/* Wait until no block IO is waiting (unless 'force') */
 	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock, md_kick_device(conf->mddev));
+			    conf->resync_lock, );
 
 	/* block any new IO from starting */
 	conf->barrier++;
 
-	/* No wait for all pending IO to complete */
+	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, md_kick_device(conf->mddev));
+			    conf->resync_lock, );
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -698,7 +692,7 @@ static void wait_barrier(conf_t *conf)
 		conf->nr_waiting++;
 		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
 				    conf->resync_lock,
-				    md_kick_device(conf->mddev));
+				    );
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -734,8 +728,8 @@ static void freeze_array(conf_t *conf)
 	wait_event_lock_irq(conf->wait_barrier,
 			    conf->nr_pending == conf->nr_queued+1,
 			    conf->resync_lock,
-			    ({ flush_pending_writes(conf);
-			       md_kick_device(conf->mddev); }));
+			    flush_pending_writes(conf));
+
 	spin_unlock_irq(&conf->resync_lock);
 }
 
@@ -762,6 +756,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
 	unsigned long flags;
 	mdk_rdev_t *blocked_rdev;
+	int plugged;
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
@@ -870,6 +865,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	 * inc refcount on their rdev.  Record them by setting
 	 * bios[x] to bio
 	 */
+	plugged = mddev_check_plugged(mddev);
+
 	raid10_find_phys(conf, r10_bio);
  retry_write:
 	blocked_rdev = NULL;
@@ -946,9 +943,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
 	/* In case raid10d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
 
-	if (do_sync || !mddev->bitmap)
+	if (do_sync || !mddev->bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
-
 	return 0;
 }
 
@@ -1640,9 +1636,11 @@ static void raid10d(mddev_t *mddev)
 	conf_t *conf = mddev->private;
 	struct list_head *head = &conf->retry_list;
 	mdk_rdev_t *rdev;
+	struct blk_plug plug;
 
 	md_check_recovery(mddev);
 
+	blk_start_plug(&plug);
 	for (;;) {
 		char b[BDEVNAME_SIZE];
 
@@ -1716,6 +1714,7 @@ static void raid10d(mddev_t *mddev)
 		}
 		cond_resched();
 	}
+	blk_finish_plug(&plug);
 }
 
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e867ee42b152..49bf5f891435 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -27,12 +27,12 @@
  *
  * We group bitmap updates into batches.  Each batch has a number.
  * We may write out several batches at once, but that isn't very important.
- * conf->bm_write is the number of the last batch successfully written.
- * conf->bm_flush is the number of the last batch that was closed to
+ * conf->seq_write is the number of the last batch successfully written.
+ * conf->seq_flush is the number of the last batch that was closed to
  *    new additions.
  * When we discover that we will need to write to any block in a stripe
  * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
- * the number of the batch it will be in. This is bm_flush+1.
+ * the number of the batch it will be in. This is seq_flush+1.
  * When we are ready to do a write, if that batch hasn't been written yet,
  *   we plug the array and queue the stripe for later.
  * When an unplug happens, we increment bm_flush, thus closing the current
@@ -199,14 +199,12 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
 		BUG_ON(!list_empty(&sh->lru));
 		BUG_ON(atomic_read(&conf->active_stripes)==0);
 		if (test_bit(STRIPE_HANDLE, &sh->state)) {
-			if (test_bit(STRIPE_DELAYED, &sh->state)) {
+			if (test_bit(STRIPE_DELAYED, &sh->state))
 				list_add_tail(&sh->lru, &conf->delayed_list);
-				plugger_set_plug(&conf->plug);
-			} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
-				   sh->bm_seq - conf->seq_write > 0) {
+			else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+				   sh->bm_seq - conf->seq_write > 0)
 				list_add_tail(&sh->lru, &conf->bitmap_list);
-				plugger_set_plug(&conf->plug);
-			} else {
+			else {
 				clear_bit(STRIPE_BIT_DELAY, &sh->state);
 				list_add_tail(&sh->lru, &conf->handle_list);
 			}
@@ -461,7 +459,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
 						     < (conf->max_nr_stripes *3/4)
 						     || !conf->inactive_blocked),
 						    conf->device_lock,
-						    md_raid5_kick_device(conf));
+						    );
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, previous);
@@ -1470,7 +1468,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    !list_empty(&conf->inactive_list),
 				    conf->device_lock,
-				    blk_flush_plug(current));
+				    );
 		osh = get_free_stripe(conf);
 		spin_unlock_irq(&conf->device_lock);
 		atomic_set(&nsh->count, 1);
@@ -3623,8 +3621,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
 				atomic_inc(&conf->preread_active_stripes);
 			list_add_tail(&sh->lru, &conf->hold_list);
 		}
-	} else
-		plugger_set_plug(&conf->plug);
+	}
 }
 
 static void activate_bit_delay(raid5_conf_t *conf)
@@ -3641,21 +3638,6 @@ static void activate_bit_delay(raid5_conf_t *conf)
 	}
 }
 
-void md_raid5_kick_device(raid5_conf_t *conf)
-{
-	blk_flush_plug(current);
-	raid5_activate_delayed(conf);
-	md_wakeup_thread(conf->mddev->thread);
-}
-EXPORT_SYMBOL_GPL(md_raid5_kick_device);
-
-static void raid5_unplug(struct plug_handle *plug)
-{
-	raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
-
-	md_raid5_kick_device(conf);
-}
-
 int md_raid5_congested(mddev_t *mddev, int bits)
 {
 	raid5_conf_t *conf = mddev->private;
@@ -3945,6 +3927,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
 	struct stripe_head *sh;
 	const int rw = bio_data_dir(bi);
 	int remaining;
+	int plugged;
 
 	if (unlikely(bi->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bi);
@@ -3963,6 +3946,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
 	bi->bi_next = NULL;
 	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
 
+	plugged = mddev_check_plugged(mddev);
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		DEFINE_WAIT(w);
 		int disks, data_disks;
@@ -4057,7 +4041,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
 				 * add failed due to overlap.  Flush everything
 				 * and wait a while
 				 */
-				md_raid5_kick_device(conf);
+				md_wakeup_thread(mddev->thread);
 				release_stripe(sh);
 				schedule();
 				goto retry;
@@ -4077,6 +4061,9 @@ static int make_request(mddev_t *mddev, struct bio * bi)
 		}
 			
 	}
+	if (!plugged)
+		md_wakeup_thread(mddev->thread);
+
 	spin_lock_irq(&conf->device_lock);
 	remaining = raid5_dec_bi_phys_segments(bi);
 	spin_unlock_irq(&conf->device_lock);
@@ -4478,24 +4465,30 @@ static void raid5d(mddev_t *mddev)
 	struct stripe_head *sh;
 	raid5_conf_t *conf = mddev->private;
 	int handled;
+	struct blk_plug plug;
 
 	pr_debug("+++ raid5d active\n");
 
 	md_check_recovery(mddev);
 
+	blk_start_plug(&plug);
 	handled = 0;
 	spin_lock_irq(&conf->device_lock);
 	while (1) {
 		struct bio *bio;
 
-		if (conf->seq_flush != conf->seq_write) {
-			int seq = conf->seq_flush;
+		if (atomic_read(&mddev->plug_cnt) == 0 &&
+		    !list_empty(&conf->bitmap_list)) {
+			/* Now is a good time to flush some bitmap updates */
+			conf->seq_flush++;
 			spin_unlock_irq(&conf->device_lock);
 			bitmap_unplug(mddev->bitmap);
 			spin_lock_irq(&conf->device_lock);
-			conf->seq_write = seq;
+			conf->seq_write = conf->seq_flush;
 			activate_bit_delay(conf);
 		}
+		if (atomic_read(&mddev->plug_cnt) == 0)
+			raid5_activate_delayed(conf);
 
 		while ((bio = remove_bio_from_retry(conf))) {
 			int ok;
@@ -4525,6 +4518,7 @@ static void raid5d(mddev_t *mddev)
 	spin_unlock_irq(&conf->device_lock);
 
 	async_tx_issue_pending_all();
+	blk_finish_plug(&plug);
 
 	pr_debug("--- raid5d inactive\n");
 }
@@ -5141,8 +5135,6 @@ static int run(mddev_t *mddev)
 		       mdname(mddev));
 	md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
 
-	plugger_init(&conf->plug, raid5_unplug);
-	mddev->plug = &conf->plug;
 	if (mddev->queue) {
 		int chunk_size;
 		/* read-ahead size must cover two whole stripes, which
@@ -5159,7 +5151,6 @@ static int run(mddev_t *mddev)
 
 		mddev->queue->backing_dev_info.congested_data = mddev;
 		mddev->queue->backing_dev_info.congested_fn = raid5_congested;
-		mddev->queue->queue_lock = &conf->device_lock;
 
 		chunk_size = mddev->chunk_sectors << 9;
 		blk_queue_io_min(mddev->queue, chunk_size);
@@ -5192,7 +5183,6 @@ static int stop(mddev_t *mddev)
 	mddev->thread = NULL;
 	if (mddev->queue)
 		mddev->queue->backing_dev_info.congested_fn = NULL;
-	plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
 	free_conf(conf);
 	mddev->private = NULL;
 	mddev->to_remove = &raid5_attrs_group;
@@ -5688,6 +5678,7 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 static void *raid45_takeover_raid0(mddev_t *mddev, int level)
 {
 	struct raid0_private_data *raid0_priv = mddev->private;
+	sector_t sectors;
 
 	/* for raid0 takeover only one zone is supported */
 	if (raid0_priv->nr_strip_zones > 1) {
@@ -5696,6 +5687,9 @@ static void *raid45_takeover_raid0(mddev_t *mddev, int level)
 		return ERR_PTR(-EINVAL);
 	}
 
+	sectors = raid0_priv->strip_zone[0].zone_end;
+	sector_div(sectors, raid0_priv->strip_zone[0].nb_dev);
+	mddev->dev_sectors = sectors;
 	mddev->new_level = level;
 	mddev->new_layout = ALGORITHM_PARITY_N;
 	mddev->new_chunk_sectors = mddev->chunk_sectors;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 8d563a4f022a..3ca77a2613ba 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -400,8 +400,6 @@ struct raid5_private_data {
 					    * Cleared when a sync completes.
 					    */
 
-	struct plug_handle	plug;
-
 	/* per cpu variables */
 	struct raid5_percpu {
 		struct page	*spare_page; /* Used when checking P/Q in raid6 */
diff --git a/drivers/media/common/tuners/tda18271-common.c b/drivers/media/common/tuners/tda18271-common.c
index 5466d47db899..aae40e52af5b 100644
--- a/drivers/media/common/tuners/tda18271-common.c
+++ b/drivers/media/common/tuners/tda18271-common.c
@@ -533,16 +533,7 @@ int tda18271_calc_main_pll(struct dvb_frontend *fe, u32 freq)
 	if (tda_fail(ret))
 		goto fail;
 
-	regs[R_MPD]   = (0x77 & pd);
-
-	switch (priv->mode) {
-	case TDA18271_ANALOG:
-		regs[R_MPD]  &= ~0x08;
-		break;
-	case TDA18271_DIGITAL:
-		regs[R_MPD]  |=  0x08;
-		break;
-	}
+	regs[R_MPD]   = (0x7f & pd);
 
 	div =  ((d * (freq / 1000)) << 7) / 125;
 
diff --git a/drivers/media/common/tuners/tda18271-fe.c b/drivers/media/common/tuners/tda18271-fe.c
index 9ad4454a148d..d884f5eee73c 100644
--- a/drivers/media/common/tuners/tda18271-fe.c
+++ b/drivers/media/common/tuners/tda18271-fe.c
@@ -579,8 +579,8 @@ static int tda18271_rf_tracking_filters_init(struct dvb_frontend *fe, u32 freq)
 #define RF3 2
 	u32 rf_default[3];
 	u32 rf_freq[3];
-	u8 prog_cal[3];
-	u8 prog_tab[3];
+	s32 prog_cal[3];
+	s32 prog_tab[3];
 
 	i = tda18271_lookup_rf_band(fe, &freq, NULL);
 
@@ -602,32 +602,33 @@ static int tda18271_rf_tracking_filters_init(struct dvb_frontend *fe, u32 freq)
 			return bcal;
 
 		tda18271_calc_rf_cal(fe, &rf_freq[rf]);
-		prog_tab[rf] = regs[R_EB14];
+		prog_tab[rf] = (s32)regs[R_EB14];
 
 		if (1 == bcal)
-			prog_cal[rf] = tda18271_calibrate_rf(fe, rf_freq[rf]);
+			prog_cal[rf] =
+				(s32)tda18271_calibrate_rf(fe, rf_freq[rf]);
 		else
 			prog_cal[rf] = prog_tab[rf];
 
 		switch (rf) {
 		case RF1:
 			map[i].rf_a1 = 0;
-			map[i].rf_b1 = (s32)(prog_cal[RF1] - prog_tab[RF1]);
+			map[i].rf_b1 = (prog_cal[RF1] - prog_tab[RF1]);
 			map[i].rf1   = rf_freq[RF1] / 1000;
 			break;
 		case RF2:
-			dividend = (s32)(prog_cal[RF2] - prog_tab[RF2]) -
-				   (s32)(prog_cal[RF1] + prog_tab[RF1]);
+			dividend = (prog_cal[RF2] - prog_tab[RF2] -
+				    prog_cal[RF1] + prog_tab[RF1]);
 			divisor = (s32)(rf_freq[RF2] - rf_freq[RF1]) / 1000;
 			map[i].rf_a1 = (dividend / divisor);
 			map[i].rf2   = rf_freq[RF2] / 1000;
 			break;
 		case RF3:
-			dividend = (s32)(prog_cal[RF3] - prog_tab[RF3]) -
-				   (s32)(prog_cal[RF2] + prog_tab[RF2]);
+			dividend = (prog_cal[RF3] - prog_tab[RF3] -
+				    prog_cal[RF2] + prog_tab[RF2]);
 			divisor = (s32)(rf_freq[RF3] - rf_freq[RF2]) / 1000;
 			map[i].rf_a2 = (dividend / divisor);
-			map[i].rf_b2 = (s32)(prog_cal[RF2] - prog_tab[RF2]);
+			map[i].rf_b2 = (prog_cal[RF2] - prog_tab[RF2]);
 			map[i].rf3   = rf_freq[RF3] / 1000;
 			break;
 		default:
diff --git a/drivers/media/common/tuners/tda18271-maps.c b/drivers/media/common/tuners/tda18271-maps.c
index e7f84c705da8..3d5b6ab7e332 100644
--- a/drivers/media/common/tuners/tda18271-maps.c
+++ b/drivers/media/common/tuners/tda18271-maps.c
@@ -229,8 +229,7 @@ static struct tda18271_map tda18271c2_km[] = {
 static struct tda18271_map tda18271_rf_band[] = {
 	{ .rfmax =  47900, .val = 0x00 },
 	{ .rfmax =  61100, .val = 0x01 },
-/*	{ .rfmax = 152600, .val = 0x02 }, */
-	{ .rfmax = 121200, .val = 0x02 },
+	{ .rfmax = 152600, .val = 0x02 },
 	{ .rfmax = 164700, .val = 0x03 },
 	{ .rfmax = 203500, .val = 0x04 },
 	{ .rfmax = 457800, .val = 0x05 },
@@ -448,7 +447,7 @@ static struct tda18271_map tda18271c2_rf_cal[] = {
 	{ .rfmax = 150000, .val = 0xb0 },
 	{ .rfmax = 151000, .val = 0xb1 },
 	{ .rfmax = 152000, .val = 0xb7 },
-	{ .rfmax = 153000, .val = 0xbd },
+	{ .rfmax = 152600, .val = 0xbd },
 	{ .rfmax = 154000, .val = 0x20 },
 	{ .rfmax = 155000, .val = 0x22 },
 	{ .rfmax = 156000, .val = 0x24 },
@@ -459,7 +458,7 @@ static struct tda18271_map tda18271c2_rf_cal[] = {
 	{ .rfmax = 161000, .val = 0x2d },
 	{ .rfmax = 163000, .val = 0x2e },
 	{ .rfmax = 164000, .val = 0x2f },
-	{ .rfmax = 165000, .val = 0x30 },
+	{ .rfmax = 164700, .val = 0x30 },
 	{ .rfmax = 166000, .val = 0x11 },
 	{ .rfmax = 167000, .val = 0x12 },
 	{ .rfmax = 168000, .val = 0x13 },
@@ -510,7 +509,8 @@ static struct tda18271_map tda18271c2_rf_cal[] = {
 	{ .rfmax = 236000, .val = 0x1b },
 	{ .rfmax = 237000, .val = 0x1c },
 	{ .rfmax = 240000, .val = 0x1d },
-	{ .rfmax = 242000, .val = 0x1f },
+	{ .rfmax = 242000, .val = 0x1e },
+	{ .rfmax = 244000, .val = 0x1f },
 	{ .rfmax = 247000, .val = 0x20 },
 	{ .rfmax = 249000, .val = 0x21 },
 	{ .rfmax = 252000, .val = 0x22 },
@@ -624,7 +624,7 @@ static struct tda18271_map tda18271c2_rf_cal[] = {
 	{ .rfmax = 453000, .val = 0x93 },
 	{ .rfmax = 454000, .val = 0x94 },
 	{ .rfmax = 456000, .val = 0x96 },
-	{ .rfmax = 457000, .val = 0x98 },
+	{ .rfmax = 457800, .val = 0x98 },
 	{ .rfmax = 461000, .val = 0x11 },
 	{ .rfmax = 468000, .val = 0x12 },
 	{ .rfmax = 472000, .val = 0x13 },
diff --git a/drivers/media/dvb/b2c2/flexcop-pci.c b/drivers/media/dvb/b2c2/flexcop-pci.c
index 955254090a0e..03f96d6ca894 100644
--- a/drivers/media/dvb/b2c2/flexcop-pci.c
+++ b/drivers/media/dvb/b2c2/flexcop-pci.c
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(debug,
 	DEBSTATUS);
 
 #define DRIVER_VERSION "0.1"
-#define DRIVER_NAME "Technisat/B2C2 FlexCop II/IIb/III Digital TV PCI Driver"
+#define DRIVER_NAME "flexcop-pci"
 #define DRIVER_AUTHOR "Patrick Boettcher <patrick.boettcher@desy.de>"
 
 struct flexcop_pci {
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig
index fe4f894183ff..c545039287ad 100644
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -356,13 +356,15 @@ config DVB_USB_LME2510
 	select DVB_TDA826X if !DVB_FE_CUSTOMISE
 	select DVB_STV0288 if !DVB_FE_CUSTOMISE
 	select DVB_IX2505V if !DVB_FE_CUSTOMISE
+	select DVB_STV0299 if !DVB_FE_CUSTOMISE
+	select DVB_PLL if !DVB_FE_CUSTOMISE
 	help
 	  Say Y here to support the LME DM04/QQBOX DVB-S USB2.0 .
 
 config DVB_USB_TECHNISAT_USB2
 	tristate "Technisat DVB-S/S2 USB2.0 support"
 	depends on DVB_USB
-	select DVB_STB0899 if !DVB_FE_CUSTOMISE
-	select DVB_STB6100 if !DVB_FE_CUSTOMISE
+	select DVB_STV090x if !DVB_FE_CUSTOMISE
+	select DVB_STV6110x if !DVB_FE_CUSTOMISE
 	help
 	  Say Y here to support the Technisat USB2 DVB-S/S2 device
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index 97af266d7f1d..65214af5cd74 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -2162,7 +2162,7 @@ struct dibx000_agc_config dib7090_agc_config[2] = {
 		.agc1_pt3       = 98,
 		.agc1_slope1    = 0,
 		.agc1_slope2    = 167,
-		.agc1_pt1       = 98,
+		.agc2_pt1       = 98,
 		.agc2_pt2       = 255,
 		.agc2_slope1    = 104,
 		.agc2_slope2    = 0,
@@ -2440,11 +2440,11 @@ static int tfe7090pvr_frontend0_attach(struct dvb_usb_adapter *adap)
 	dib0700_set_i2c_speed(adap->dev, 340);
 	adap->fe = dvb_attach(dib7000p_attach, &adap->dev->i2c_adap, 0x90, &tfe7090pvr_dib7000p_config[0]);
 
-	dib7090_slave_reset(adap->fe);
-
 	if (adap->fe == NULL)
 		return -ENODEV;
 
+	dib7090_slave_reset(adap->fe);
+
 	return 0;
 }
 
diff --git a/drivers/media/dvb/ngene/ngene-core.c b/drivers/media/dvb/ngene/ngene-core.c
index ccc2d1af49d4..6927c726ce35 100644
--- a/drivers/media/dvb/ngene/ngene-core.c
+++ b/drivers/media/dvb/ngene/ngene-core.c
@@ -1520,6 +1520,7 @@ static int init_channel(struct ngene_channel *chan)
 	if (dev->ci.en && (io & NGENE_IO_TSOUT)) {
 		dvb_ca_en50221_init(adapter, dev->ci.en, 0, 1);
 		set_transfer(chan, 1);
+		chan->dev->channel[2].DataFormatFlags = DF_SWAP32;
 		set_transfer(&chan->dev->channel[2], 1);
 		dvb_register_device(adapter, &chan->ci_dev,
 				    &ngene_dvbdev_ci, (void *) chan,
diff --git a/drivers/media/media-entity.c b/drivers/media/media-entity.c
index 23640ed44d85..056138f63c7d 100644
--- a/drivers/media/media-entity.c
+++ b/drivers/media/media-entity.c
@@ -378,7 +378,6 @@ EXPORT_SYMBOL_GPL(media_entity_create_link);
 
 static int __media_entity_setup_link_notify(struct media_link *link, u32 flags)
 {
-	const u32 mask = MEDIA_LNK_FL_ENABLED;
 	int ret;
 
 	/* Notify both entities. */
@@ -395,7 +394,7 @@ static int __media_entity_setup_link_notify(struct media_link *link, u32 flags)
 		return ret;
 	}
 
-	link->flags = (link->flags & ~mask) | (flags & mask);
+	link->flags = flags;
 	link->reverse->flags = link->flags;
 
 	return 0;
@@ -417,6 +416,7 @@ static int __media_entity_setup_link_notify(struct media_link *link, u32 flags)
  */
 int __media_entity_setup_link(struct media_link *link, u32 flags)
 {
+	const u32 mask = MEDIA_LNK_FL_ENABLED;
 	struct media_device *mdev;
 	struct media_entity *source, *sink;
 	int ret = -EBUSY;
@@ -424,6 +424,10 @@ int __media_entity_setup_link(struct media_link *link, u32 flags)
 	if (link == NULL)
 		return -EINVAL;
 
+	/* The non-modifiable link flags must not be modified. */
+	if ((link->flags & ~mask) != (flags & ~mask))
+		return -EINVAL;
+
 	if (link->flags & MEDIA_LNK_FL_IMMUTABLE)
 		return link->flags == flags ? 0 : -EINVAL;
 
diff --git a/drivers/media/radio/radio-sf16fmr2.c b/drivers/media/radio/radio-sf16fmr2.c
index dc3f04c52d5e..87bad7678d92 100644
--- a/drivers/media/radio/radio-sf16fmr2.c
+++ b/drivers/media/radio/radio-sf16fmr2.c
@@ -170,7 +170,7 @@ static int fmr2_setfreq(struct fmr2 *dev)
 	return 0;
 }
 
-/* !!! not tested, in my card this does't work !!! */
+/* !!! not tested, in my card this doesn't work !!! */
 static int fmr2_setvolume(struct fmr2 *dev)
 {
 	int vol[16] = { 0x021, 0x084, 0x090, 0x104,
diff --git a/drivers/media/radio/saa7706h.c b/drivers/media/radio/saa7706h.c
index 585680ffbfb6..b1193dfc5087 100644
--- a/drivers/media/radio/saa7706h.c
+++ b/drivers/media/radio/saa7706h.c
@@ -376,7 +376,7 @@ static int __devinit saa7706h_probe(struct i2c_client *client,
 	v4l_info(client, "chip found @ 0x%02x (%s)\n",
 			client->addr << 1, client->adapter->name);
 
-	state = kmalloc(sizeof(struct saa7706h_state), GFP_KERNEL);
+	state = kzalloc(sizeof(struct saa7706h_state), GFP_KERNEL);
 	if (state == NULL)
 		return -ENOMEM;
 	sd = &state->sd;
diff --git a/drivers/media/radio/tef6862.c b/drivers/media/radio/tef6862.c
index 7c0d77751f6e..0991e1973678 100644
--- a/drivers/media/radio/tef6862.c
+++ b/drivers/media/radio/tef6862.c
@@ -176,7 +176,7 @@ static int __devinit tef6862_probe(struct i2c_client *client,
 	v4l_info(client, "chip found @ 0x%02x (%s)\n",
 			client->addr << 1, client->adapter->name);
 
-	state = kmalloc(sizeof(struct tef6862_state), GFP_KERNEL);
+	state = kzalloc(sizeof(struct tef6862_state), GFP_KERNEL);
 	if (state == NULL)
 		return -ENOMEM;
 	state->freq = TEF6862_LO_FREQ;
diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
index ebd68edf5b24..8fc0f081b470 100644
--- a/drivers/media/rc/imon.c
+++ b/drivers/media/rc/imon.c
@@ -46,7 +46,7 @@
 #define MOD_AUTHOR	"Jarod Wilson <jarod@wilsonet.com>"
 #define MOD_DESC	"Driver for SoundGraph iMON MultiMedia IR/Display"
 #define MOD_NAME	"imon"
-#define MOD_VERSION	"0.9.2"
+#define MOD_VERSION	"0.9.3"
 
 #define DISPLAY_MINOR_BASE	144
 #define DEVICE_NAME	"lcd%d"
@@ -460,8 +460,9 @@ static int display_close(struct inode *inode, struct file *file)
 }
 
 /**
- * Sends a packet to the device -- this function must be called
- * with ictx->lock held.
+ * Sends a packet to the device -- this function must be called with
+ * ictx->lock held, or its unlock/lock sequence while waiting for tx
+ * to complete can/will lead to a deadlock.
  */
 static int send_packet(struct imon_context *ictx)
 {
@@ -991,12 +992,21 @@ static void imon_touch_display_timeout(unsigned long data)
  * the iMON remotes, and those used by the Windows MCE remotes (which is
  * really just RC-6), but only one or the other at a time, as the signals
  * are decoded onboard the receiver.
+ *
+ * This function gets called two different ways, one way is from
+ * rc_register_device, for initial protocol selection/setup, and the other is
+ * via a userspace-initiated protocol change request, either by direct sysfs
+ * prodding or by something like ir-keytable. In the rc_register_device case,
+ * the imon context lock is already held, but when initiated from userspace,
+ * it is not, so we must acquire it prior to calling send_packet, which
+ * requires that the lock is held.
  */
 static int imon_ir_change_protocol(struct rc_dev *rc, u64 rc_type)
 {
 	int retval;
 	struct imon_context *ictx = rc->priv;
 	struct device *dev = ictx->dev;
+	bool unlock = false;
 	unsigned char ir_proto_packet[] = {
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86 };
 
@@ -1029,6 +1039,11 @@ static int imon_ir_change_protocol(struct rc_dev *rc, u64 rc_type)
 
 	memcpy(ictx->usb_tx_buf, &ir_proto_packet, sizeof(ir_proto_packet));
 
+	if (!mutex_is_locked(&ictx->lock)) {
+		unlock = true;
+		mutex_lock(&ictx->lock);
+	}
+
 	retval = send_packet(ictx);
 	if (retval)
 		goto out;
@@ -1037,6 +1052,9 @@ static int imon_ir_change_protocol(struct rc_dev *rc, u64 rc_type)
 	ictx->pad_mouse = false;
 
 out:
+	if (unlock)
+		mutex_unlock(&ictx->lock);
+
 	return retval;
 }
 
@@ -2134,6 +2152,7 @@ static struct imon_context *imon_init_intf0(struct usb_interface *intf)
 		goto rdev_setup_failed;
 	}
 
+	mutex_unlock(&ictx->lock);
 	return ictx;
 
 rdev_setup_failed:
@@ -2205,6 +2224,7 @@ static struct imon_context *imon_init_intf1(struct usb_interface *intf,
 		goto urb_submit_failed;
 	}
 
+	mutex_unlock(&ictx->lock);
 	return ictx;
 
 urb_submit_failed:
@@ -2299,6 +2319,8 @@ static int __devinit imon_probe(struct usb_interface *interface,
 	usb_set_intfdata(interface, ictx);
 
 	if (ifnum == 0) {
+		mutex_lock(&ictx->lock);
+
 		if (product == 0xffdc && ictx->rf_device) {
 			sysfs_err = sysfs_create_group(&interface->dev.kobj,
 						       &imon_rf_attr_group);
@@ -2309,13 +2331,14 @@ static int __devinit imon_probe(struct usb_interface *interface,
 
 		if (ictx->display_supported)
 			imon_init_display(ictx, interface);
+
+		mutex_unlock(&ictx->lock);
 	}
 
 	dev_info(dev, "iMON device (%04x:%04x, intf%d) on "
 		 "usb<%d:%d> initialized\n", vendor, product, ifnum,
 		 usbdev->bus->busnum, usbdev->devnum);
 
-	mutex_unlock(&ictx->lock);
 	mutex_unlock(&driver_lock);
 
 	return 0;
diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c
index accaf6c9789a..43908a70bd8b 100644
--- a/drivers/media/rc/ite-cir.c
+++ b/drivers/media/rc/ite-cir.c
@@ -36,6 +36,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/input.h>
 #include <linux/bitops.h>
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 044fb7a382d6..0c273ec465c9 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -220,6 +220,8 @@ static struct usb_device_id mceusb_dev_table[] = {
 	{ USB_DEVICE(VENDOR_PHILIPS, 0x206c) },
 	/* Philips/Spinel plus IR transceiver for ASUS */
 	{ USB_DEVICE(VENDOR_PHILIPS, 0x2088) },
+	/* Philips IR transceiver (Dell branded) */
+	{ USB_DEVICE(VENDOR_PHILIPS, 0x2093) },
 	/* Realtek MCE IR Receiver and card reader */
 	{ USB_DEVICE(VENDOR_REALTEK, 0x0161),
 	  .driver_info = MULTIFUNCTION },
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index f53f9c68d38d..a2706648e365 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -707,7 +707,8 @@ static void ir_close(struct input_dev *idev)
 {
 	struct rc_dev *rdev = input_get_drvdata(idev);
 
-	rdev->close(rdev);
+	 if (rdev)
+		rdev->close(rdev);
 }
 
 /* class for /sys/class/rc */
@@ -733,6 +734,7 @@ static struct {
 	{ RC_TYPE_SONY,		"sony"		},
 	{ RC_TYPE_RC5_SZ,	"rc-5-sz"	},
 	{ RC_TYPE_LIRC,		"lirc"		},
+	{ RC_TYPE_OTHER,	"other"		},
 };
 
 #define PROTO_NONE	"none"
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 4498b944dec8..00f51dd121f3 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -875,7 +875,7 @@ config MX3_VIDEO
 config VIDEO_MX3
 	tristate "i.MX3x Camera Sensor Interface driver"
 	depends on VIDEO_DEV && MX3_IPU && SOC_CAMERA
-	select VIDEOBUF_DMA_CONTIG
+	select VIDEOBUF2_DMA_CONTIG
 	select MX3_VIDEO
 	---help---
 	  This is a v4l2 driver for the i.MX3x Camera Sensor Interface
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index c6e2ca3b1149..6fbc356113c1 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -350,9 +350,17 @@ void cx18_streams_cleanup(struct cx18 *cx, int unregister)
 
 		/* No struct video_device, but can have buffers allocated */
 		if (type == CX18_ENC_STREAM_TYPE_IDX) {
+			/* If the module params didn't inhibit IDX ... */
 			if (cx->stream_buffers[type] != 0) {
 				cx->stream_buffers[type] = 0;
-				cx18_stream_free(&cx->streams[type]);
+				/*
+				 * Before calling cx18_stream_free(),
+				 * check if the IDX stream was actually set up.
+				 * Needed, since the cx18_probe() error path
+				 * exits through here as well as normal clean up
+				 */
+				if (cx->streams[type].buffers != 0)
+					cx18_stream_free(&cx->streams[type]);
 			}
 			continue;
 		}
diff --git a/drivers/media/video/cx23885/Kconfig b/drivers/media/video/cx23885/Kconfig
index 3b6e7f28568e..caab1bfb79e2 100644
--- a/drivers/media/video/cx23885/Kconfig
+++ b/drivers/media/video/cx23885/Kconfig
@@ -22,6 +22,7 @@ config VIDEO_CX23885
 	select DVB_CX24116 if !DVB_FE_CUSTOMISE
 	select DVB_STV0900 if !DVB_FE_CUSTOMISE
 	select DVB_DS3000 if !DVB_FE_CUSTOMISE
+	select DVB_STV0367 if !DVB_FE_CUSTOMISE
 	select MEDIA_TUNER_MT2131 if !MEDIA_TUNER_CUSTOMISE
 	select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE
 	select MEDIA_TUNER_TDA8290 if !MEDIA_TUNER_CUSTOMISE
diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c
index c820e2f53527..3f442003623d 100644
--- a/drivers/media/video/cx88/cx88-input.c
+++ b/drivers/media/video/cx88/cx88-input.c
@@ -524,7 +524,7 @@ void cx88_ir_irq(struct cx88_core *core)
 	for (todo = 32; todo > 0; todo -= bits) {
 		ev.pulse = samples & 0x80000000 ? false : true;
 		bits = min(todo, 32U - fls(ev.pulse ? samples : ~samples));
-		ev.duration = (bits * NSEC_PER_SEC) / (1000 * ir_samplerate);
+		ev.duration = (bits * (NSEC_PER_SEC / 1000)) / ir_samplerate;
 		ir_raw_event_store_with_filter(ir->dev, &ev);
 		samples <<= bits;
 	}
diff --git a/drivers/media/video/imx074.c b/drivers/media/video/imx074.c
index 1a1169115716..0382ea752e6f 100644
--- a/drivers/media/video/imx074.c
+++ b/drivers/media/video/imx074.c
@@ -298,7 +298,7 @@ static unsigned long imx074_query_bus_param(struct soc_camera_device *icd)
 static int imx074_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
-	return -1;
+	return -EINVAL;
 }
 
 static struct soc_camera_ops imx074_ops = {
diff --git a/drivers/media/video/m52790.c b/drivers/media/video/m52790.c
index 5e1c9a81984c..303ffa7df4ac 100644
--- a/drivers/media/video/m52790.c
+++ b/drivers/media/video/m52790.c
@@ -174,7 +174,7 @@ static int m52790_probe(struct i2c_client *client,
 	v4l_info(client, "chip found @ 0x%x (%s)\n",
 			client->addr << 1, client->adapter->name);
 
-	state = kmalloc(sizeof(struct m52790_state), GFP_KERNEL);
+	state = kzalloc(sizeof(struct m52790_state), GFP_KERNEL);
 	if (state == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/media/video/omap3isp/isp.c b/drivers/media/video/omap3isp/isp.c
index 503bd7922bd6..472a69359e60 100644
--- a/drivers/media/video/omap3isp/isp.c
+++ b/drivers/media/video/omap3isp/isp.c
@@ -215,20 +215,21 @@ static u32 isp_set_xclk(struct isp_device *isp, u32 xclk, u8 xclksel)
 	}
 
 	switch (xclksel) {
-	case 0:
+	case ISP_XCLK_A:
 		isp_reg_clr_set(isp, OMAP3_ISP_IOMEM_MAIN, ISP_TCTRL_CTRL,
 				ISPTCTRL_CTRL_DIVA_MASK,
 				divisor << ISPTCTRL_CTRL_DIVA_SHIFT);
 		dev_dbg(isp->dev, "isp_set_xclk(): cam_xclka set to %d Hz\n",
 			currentxclk);
 		break;
-	case 1:
+	case ISP_XCLK_B:
 		isp_reg_clr_set(isp, OMAP3_ISP_IOMEM_MAIN, ISP_TCTRL_CTRL,
 				ISPTCTRL_CTRL_DIVB_MASK,
 				divisor << ISPTCTRL_CTRL_DIVB_SHIFT);
 		dev_dbg(isp->dev, "isp_set_xclk(): cam_xclkb set to %d Hz\n",
 			currentxclk);
 		break;
+	case ISP_XCLK_NONE:
 	default:
 		omap3isp_put(isp);
 		dev_dbg(isp->dev, "ISP_ERR: isp_set_xclk(): Invalid requested "
@@ -237,13 +238,13 @@ static u32 isp_set_xclk(struct isp_device *isp, u32 xclk, u8 xclksel)
 	}
 
 	/* Do we go from stable whatever to clock? */
-	if (divisor >= 2 && isp->xclk_divisor[xclksel] < 2)
+	if (divisor >= 2 && isp->xclk_divisor[xclksel - 1] < 2)
 		omap3isp_get(isp);
 	/* Stopping the clock. */
-	else if (divisor < 2 && isp->xclk_divisor[xclksel] >= 2)
+	else if (divisor < 2 && isp->xclk_divisor[xclksel - 1] >= 2)
 		omap3isp_put(isp);
 
-	isp->xclk_divisor[xclksel] = divisor;
+	isp->xclk_divisor[xclksel - 1] = divisor;
 
 	omap3isp_put(isp);
 
@@ -285,7 +286,8 @@ static void isp_power_settings(struct isp_device *isp, int idle)
  */
 void omap3isp_configure_bridge(struct isp_device *isp,
 			       enum ccdc_input_entity input,
-			       const struct isp_parallel_platform_data *pdata)
+			       const struct isp_parallel_platform_data *pdata,
+			       unsigned int shift)
 {
 	u32 ispctrl_val;
 
@@ -298,9 +300,9 @@ void omap3isp_configure_bridge(struct isp_device *isp,
 	switch (input) {
 	case CCDC_INPUT_PARALLEL:
 		ispctrl_val |= ISPCTRL_PAR_SER_CLK_SEL_PARALLEL;
-		ispctrl_val |= pdata->data_lane_shift << ISPCTRL_SHIFT_SHIFT;
 		ispctrl_val |= pdata->clk_pol << ISPCTRL_PAR_CLK_POL_SHIFT;
 		ispctrl_val |= pdata->bridge << ISPCTRL_PAR_BRIDGE_SHIFT;
+		shift += pdata->data_lane_shift * 2;
 		break;
 
 	case CCDC_INPUT_CSI2A:
@@ -319,6 +321,8 @@ void omap3isp_configure_bridge(struct isp_device *isp,
 		return;
 	}
 
+	ispctrl_val |= ((shift/2) << ISPCTRL_SHIFT_SHIFT) & ISPCTRL_SHIFT_MASK;
+
 	ispctrl_val &= ~ISPCTRL_SYNC_DETECT_MASK;
 	ispctrl_val |= ISPCTRL_SYNC_DETECT_VSRISE;
 
@@ -658,6 +662,8 @@ int omap3isp_pipeline_pm_use(struct media_entity *entity, int use)
 
 	/* Apply power change to connected non-nodes. */
 	ret = isp_pipeline_pm_power(entity, change);
+	if (ret < 0)
+		entity->use_count -= change;
 
 	mutex_unlock(&entity->parent->graph_mutex);
 
@@ -872,6 +878,9 @@ static int isp_pipeline_disable(struct isp_pipeline *pipe)
 		}
 	}
 
+	if (failure < 0)
+		isp->needs_reset = true;
+
 	return failure;
 }
 
@@ -884,7 +893,8 @@ static int isp_pipeline_disable(struct isp_pipeline *pipe)
  * single-shot or continuous mode.
  *
  * Return 0 if successful, or the return value of the failed video::s_stream
- * operation otherwise.
+ * operation otherwise. The pipeline state is not updated when the operation
+ * fails, except when stopping the pipeline.
  */
 int omap3isp_pipeline_set_stream(struct isp_pipeline *pipe,
 				 enum isp_pipeline_stream_state state)
@@ -895,7 +905,9 @@ int omap3isp_pipeline_set_stream(struct isp_pipeline *pipe,
 		ret = isp_pipeline_disable(pipe);
 	else
 		ret = isp_pipeline_enable(pipe, state);
-	pipe->stream_state = state;
+
+	if (ret == 0 || state == ISP_PIPELINE_STREAM_STOPPED)
+		pipe->stream_state = state;
 
 	return ret;
 }
@@ -1481,6 +1493,10 @@ void omap3isp_put(struct isp_device *isp)
 	if (--isp->ref_count == 0) {
 		isp_disable_interrupts(isp);
 		isp_save_ctx(isp);
+		if (isp->needs_reset) {
+			isp_reset(isp);
+			isp->needs_reset = false;
+		}
 		isp_disable_clocks(isp);
 	}
 	mutex_unlock(&isp->isp_mutex);
diff --git a/drivers/media/video/omap3isp/isp.h b/drivers/media/video/omap3isp/isp.h
index cf5214e95a92..2620c405f5e4 100644
--- a/drivers/media/video/omap3isp/isp.h
+++ b/drivers/media/video/omap3isp/isp.h
@@ -132,7 +132,6 @@ struct isp_reg {
 
 /**
  * struct isp_parallel_platform_data - Parallel interface platform data
- * @width: Parallel bus width in bits (8, 10, 11 or 12)
  * @data_lane_shift: Data lane shifter
  *		0 - CAMEXT[13:0] -> CAM[13:0]
  *		1 - CAMEXT[13:2] -> CAM[11:0]
@@ -146,7 +145,6 @@ struct isp_reg {
  *		ISPCTRL_PAR_BRIDGE_BENDIAN - Big endian
  */
 struct isp_parallel_platform_data {
-	unsigned int width;
 	unsigned int data_lane_shift:2;
 	unsigned int clk_pol:1;
 	unsigned int bridge:4;
@@ -262,6 +260,7 @@ struct isp_device {
 	/* ISP Obj */
 	spinlock_t stat_lock;	/* common lock for statistic drivers */
 	struct mutex isp_mutex;	/* For handling ref_count field */
+	bool needs_reset;
 	int has_context;
 	int ref_count;
 	unsigned int autoidle;
@@ -311,11 +310,12 @@ int omap3isp_pipeline_set_stream(struct isp_pipeline *pipe,
 				 enum isp_pipeline_stream_state state);
 void omap3isp_configure_bridge(struct isp_device *isp,
 			       enum ccdc_input_entity input,
-			       const struct isp_parallel_platform_data *pdata);
+			       const struct isp_parallel_platform_data *pdata,
+			       unsigned int shift);
 
-#define ISP_XCLK_NONE			-1
-#define ISP_XCLK_A			0
-#define ISP_XCLK_B			1
+#define ISP_XCLK_NONE			0
+#define ISP_XCLK_A			1
+#define ISP_XCLK_B			2
 
 struct isp_device *omap3isp_get(struct isp_device *isp);
 void omap3isp_put(struct isp_device *isp);
diff --git a/drivers/media/video/omap3isp/ispccdc.c b/drivers/media/video/omap3isp/ispccdc.c
index 5ff9d14ce710..39d501bda636 100644
--- a/drivers/media/video/omap3isp/ispccdc.c
+++ b/drivers/media/video/omap3isp/ispccdc.c
@@ -43,6 +43,12 @@ __ccdc_get_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_fh *fh,
 
 static const unsigned int ccdc_fmts[] = {
 	V4L2_MBUS_FMT_Y8_1X8,
+	V4L2_MBUS_FMT_Y10_1X10,
+	V4L2_MBUS_FMT_Y12_1X12,
+	V4L2_MBUS_FMT_SGRBG8_1X8,
+	V4L2_MBUS_FMT_SRGGB8_1X8,
+	V4L2_MBUS_FMT_SBGGR8_1X8,
+	V4L2_MBUS_FMT_SGBRG8_1X8,
 	V4L2_MBUS_FMT_SGRBG10_1X10,
 	V4L2_MBUS_FMT_SRGGB10_1X10,
 	V4L2_MBUS_FMT_SBGGR10_1X10,
@@ -1110,21 +1116,38 @@ static void ccdc_configure(struct isp_ccdc_device *ccdc)
 	struct isp_parallel_platform_data *pdata = NULL;
 	struct v4l2_subdev *sensor;
 	struct v4l2_mbus_framefmt *format;
+	const struct isp_format_info *fmt_info;
+	struct v4l2_subdev_format fmt_src;
+	unsigned int depth_out;
+	unsigned int depth_in = 0;
 	struct media_pad *pad;
 	unsigned long flags;
+	unsigned int shift;
 	u32 syn_mode;
 	u32 ccdc_pattern;
 
-	if (ccdc->input == CCDC_INPUT_PARALLEL) {
-		pad = media_entity_remote_source(&ccdc->pads[CCDC_PAD_SINK]);
-		sensor = media_entity_to_v4l2_subdev(pad->entity);
+	pad = media_entity_remote_source(&ccdc->pads[CCDC_PAD_SINK]);
+	sensor = media_entity_to_v4l2_subdev(pad->entity);
+	if (ccdc->input == CCDC_INPUT_PARALLEL)
 		pdata = &((struct isp_v4l2_subdevs_group *)sensor->host_priv)
 			->bus.parallel;
+
+	/* Compute shift value for lane shifter to configure the bridge. */
+	fmt_src.pad = pad->index;
+	fmt_src.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+	if (!v4l2_subdev_call(sensor, pad, get_fmt, NULL, &fmt_src)) {
+		fmt_info = omap3isp_video_format_info(fmt_src.format.code);
+		depth_in = fmt_info->bpp;
 	}
 
-	omap3isp_configure_bridge(isp, ccdc->input, pdata);
+	fmt_info = omap3isp_video_format_info
+		(isp->isp_ccdc.formats[CCDC_PAD_SINK].code);
+	depth_out = fmt_info->bpp;
+
+	shift = depth_in - depth_out;
+	omap3isp_configure_bridge(isp, ccdc->input, pdata, shift);
 
-	ccdc->syncif.datsz = pdata ? pdata->width : 10;
+	ccdc->syncif.datsz = depth_out;
 	ccdc_config_sync_if(ccdc, &ccdc->syncif);
 
 	/* CCDC_PAD_SINK */
@@ -1338,7 +1361,7 @@ static int ccdc_sbl_wait_idle(struct isp_ccdc_device *ccdc,
  * @ccdc: Pointer to ISP CCDC device.
  * @event: Pointing which event trigger handler
  *
- * Return 1 when the event and stopping request combination is satisfyied,
+ * Return 1 when the event and stopping request combination is satisfied,
  * zero otherwise.
  */
 static int __ccdc_handle_stopping(struct isp_ccdc_device *ccdc, u32 event)
@@ -1618,7 +1641,7 @@ static int ccdc_video_queue(struct isp_video *video, struct isp_buffer *buffer)
 
 	ccdc_set_outaddr(ccdc, buffer->isp_addr);
 
-	/* We now have a buffer queued on the output, restart the pipeline in
+	/* We now have a buffer queued on the output, restart the pipeline
 	 * on the next CCDC interrupt if running in continuous mode (or when
 	 * starting the stream).
 	 */
diff --git a/drivers/media/video/omap3isp/isppreview.c b/drivers/media/video/omap3isp/isppreview.c
index 2b16988a501d..aba537af87e4 100644
--- a/drivers/media/video/omap3isp/isppreview.c
+++ b/drivers/media/video/omap3isp/isppreview.c
@@ -755,7 +755,7 @@ static struct preview_update update_attrs[] = {
  * @configs - pointer to update config structure.
  * @config - return pointer to appropriate structure field.
  * @bit - for which feature to return pointers.
- * Return size of coresponding prev_params member
+ * Return size of corresponding prev_params member
  */
 static u32
 __preview_get_ptrs(struct prev_params *params, void **param,
diff --git a/drivers/media/video/omap3isp/ispqueue.c b/drivers/media/video/omap3isp/ispqueue.c
index 8fddc5806b0d..9c317148205f 100644
--- a/drivers/media/video/omap3isp/ispqueue.c
+++ b/drivers/media/video/omap3isp/ispqueue.c
@@ -339,7 +339,7 @@ static int isp_video_buffer_prepare_user(struct isp_video_buffer *buf)
 	up_read(&current->mm->mmap_sem);
 
 	if (ret != buf->npages) {
-		buf->npages = ret;
+		buf->npages = ret < 0 ? 0 : ret;
 		isp_video_buffer_cleanup(buf);
 		return -EFAULT;
 	}
@@ -408,8 +408,8 @@ done:
  * isp_video_buffer_prepare_vm_flags - Get VMA flags for a userspace address
  *
  * This function locates the VMAs for the buffer's userspace address and checks
- * that their flags match. The onlflag that we need to care for at the moment is
- * VM_PFNMAP.
+ * that their flags match. The only flag that we need to care for at the moment
+ * is VM_PFNMAP.
  *
  * The buffer vm_flags field is set to the first VMA flags.
  *
diff --git a/drivers/media/video/omap3isp/ispresizer.c b/drivers/media/video/omap3isp/ispresizer.c
index 653f88ba56db..0bb0f8cd36f5 100644
--- a/drivers/media/video/omap3isp/ispresizer.c
+++ b/drivers/media/video/omap3isp/ispresizer.c
@@ -714,19 +714,50 @@ static void resizer_print_status(struct isp_res_device *res)
  * iw and ih are the input width and height after cropping. Those equations need
  * to be satisfied exactly for the resizer to work correctly.
  *
- * Reverting the equations, we can compute the resizing ratios with
+ * The equations can't be easily reverted, as the >> 8 operation is not linear.
+ * In addition, not all input sizes can be achieved for a given output size. To
+ * get the highest input size lower than or equal to the requested input size,
+ * we need to compute the highest resizing ratio that satisfies the following
+ * inequality (taking the 4-tap mode width equation as an example)
+ *
+ *	iw >= (32 * sph + (ow - 1) * hrsz + 16) >> 8 - 7
+ *
+ * (where iw is the requested input width) which can be rewritten as
+ *
+ *	  iw - 7            >= (32 * sph + (ow - 1) * hrsz + 16) >> 8
+ *	 (iw - 7) << 8      >=  32 * sph + (ow - 1) * hrsz + 16 - b
+ *	((iw - 7) << 8) + b >=  32 * sph + (ow - 1) * hrsz + 16
+ *
+ * where b is the value of the 8 least significant bits of the right hand side
+ * expression of the last inequality. The highest resizing ratio value will be
+ * achieved when b is equal to its maximum value of 255. That resizing ratio
+ * value will still satisfy the original inequality, as b will disappear when
+ * the expression will be shifted right by 8.
+ *
+ * The reverted the equations thus become
  *
  * - 8-phase, 4-tap mode
- *	hrsz = ((iw - 7) * 256 - 16 - 32 * sph) / (ow - 1)
- *	vrsz = ((ih - 4) * 256 - 16 - 32 * spv) / (oh - 1)
+ *	hrsz = ((iw - 7) * 256 + 255 - 16 - 32 * sph) / (ow - 1)
+ *	vrsz = ((ih - 4) * 256 + 255 - 16 - 32 * spv) / (oh - 1)
  * - 4-phase, 7-tap mode
- *	hrsz = ((iw - 7) * 256 - 32 - 64 * sph) / (ow - 1)
- *	vrsz = ((ih - 7) * 256 - 32 - 64 * spv) / (oh - 1)
+ *	hrsz = ((iw - 7) * 256 + 255 - 32 - 64 * sph) / (ow - 1)
+ *	vrsz = ((ih - 7) * 256 + 255 - 32 - 64 * spv) / (oh - 1)
  *
- * The ratios are integer values, and must be rounded down to ensure that the
- * cropped input size is not bigger than the uncropped input size. As the ratio
- * in 7-tap mode is always smaller than the ratio in 4-tap mode, we can use the
- * 7-tap mode equations to compute a ratio approximation.
+ * The ratios are integer values, and are rounded down to ensure that the
+ * cropped input size is not bigger than the uncropped input size.
+ *
+ * As the number of phases/taps, used to select the correct equations to compute
+ * the ratio, depends on the ratio, we start with the 4-tap mode equations to
+ * compute an approximation of the ratio, and switch to the 7-tap mode equations
+ * if the approximation is higher than the ratio threshold.
+ *
+ * As the 7-tap mode equations will return a ratio smaller than or equal to the
+ * 4-tap mode equations, the resulting ratio could become lower than or equal to
+ * the ratio threshold. This 'equations loop' isn't an issue as long as the
+ * correct equations are used to compute the final input size. Starting with the
+ * 4-tap mode equations ensure that, in case of values resulting in a 'ratio
+ * loop', the smallest of the ratio values will be used, never exceeding the
+ * requested input size.
  *
  * We first clamp the output size according to the hardware capabilitie to avoid
  * auto-cropping the input more than required to satisfy the TRM equations. The
@@ -775,6 +806,8 @@ static void resizer_calc_ratios(struct isp_res_device *res,
 	unsigned int max_width;
 	unsigned int max_height;
 	unsigned int width_alignment;
+	unsigned int width;
+	unsigned int height;
 
 	/*
 	 * Clamp the output height based on the hardware capabilities and
@@ -786,19 +819,22 @@ static void resizer_calc_ratios(struct isp_res_device *res,
 	max_height = min_t(unsigned int, max_height, MAX_OUT_HEIGHT);
 	output->height = clamp(output->height, min_height, max_height);
 
-	ratio->vert = ((input->height - 7) * 256 - 32 - 64 * spv)
+	ratio->vert = ((input->height - 4) * 256 + 255 - 16 - 32 * spv)
 		    / (output->height - 1);
+	if (ratio->vert > MID_RESIZE_VALUE)
+		ratio->vert = ((input->height - 7) * 256 + 255 - 32 - 64 * spv)
+			    / (output->height - 1);
 	ratio->vert = clamp_t(unsigned int, ratio->vert,
 			      MIN_RESIZE_VALUE, MAX_RESIZE_VALUE);
 
 	if (ratio->vert <= MID_RESIZE_VALUE) {
 		upscaled_height = (output->height - 1) * ratio->vert
 				+ 32 * spv + 16;
-		input->height = (upscaled_height >> 8) + 4;
+		height = (upscaled_height >> 8) + 4;
 	} else {
 		upscaled_height = (output->height - 1) * ratio->vert
 				+ 64 * spv + 32;
-		input->height = (upscaled_height >> 8) + 7;
+		height = (upscaled_height >> 8) + 7;
 	}
 
 	/*
@@ -854,20 +890,29 @@ static void resizer_calc_ratios(struct isp_res_device *res,
 			      max_width & ~(width_alignment - 1));
 	output->width = ALIGN(output->width, width_alignment);
 
-	ratio->horz = ((input->width - 7) * 256 - 32 - 64 * sph)
+	ratio->horz = ((input->width - 7) * 256 + 255 - 16 - 32 * sph)
 		    / (output->width - 1);
+	if (ratio->horz > MID_RESIZE_VALUE)
+		ratio->horz = ((input->width - 7) * 256 + 255 - 32 - 64 * sph)
+			    / (output->width - 1);
 	ratio->horz = clamp_t(unsigned int, ratio->horz,
 			      MIN_RESIZE_VALUE, MAX_RESIZE_VALUE);
 
 	if (ratio->horz <= MID_RESIZE_VALUE) {
 		upscaled_width = (output->width - 1) * ratio->horz
 			       + 32 * sph + 16;
-		input->width = (upscaled_width >> 8) + 7;
+		width = (upscaled_width >> 8) + 7;
 	} else {
 		upscaled_width = (output->width - 1) * ratio->horz
 			       + 64 * sph + 32;
-		input->width = (upscaled_width >> 8) + 7;
+		width = (upscaled_width >> 8) + 7;
 	}
+
+	/* Center the new crop rectangle. */
+	input->left += (input->width - width) / 2;
+	input->top += (input->height - height) / 2;
+	input->width = width;
+	input->height = height;
 }
 
 /*
diff --git a/drivers/media/video/omap3isp/ispstat.h b/drivers/media/video/omap3isp/ispstat.h
index 820950c9ef46..d86da94fa50d 100644
--- a/drivers/media/video/omap3isp/ispstat.h
+++ b/drivers/media/video/omap3isp/ispstat.h
@@ -131,9 +131,9 @@ struct ispstat {
 struct ispstat_generic_config {
 	/*
 	 * Fields must be in the same order as in:
-	 *  - isph3a_aewb_config
-	 *  - isph3a_af_config
-	 *  - isphist_config
+	 *  - omap3isp_h3a_aewb_config
+	 *  - omap3isp_h3a_af_config
+	 *  - omap3isp_hist_config
 	 */
 	u32 buf_size;
 	u16 config_counter;
diff --git a/drivers/media/video/omap3isp/ispvideo.c b/drivers/media/video/omap3isp/ispvideo.c
index 208a7ec739d7..9cd8f1aa567b 100644
--- a/drivers/media/video/omap3isp/ispvideo.c
+++ b/drivers/media/video/omap3isp/ispvideo.c
@@ -47,29 +47,59 @@
 
 static struct isp_format_info formats[] = {
 	{ V4L2_MBUS_FMT_Y8_1X8, V4L2_MBUS_FMT_Y8_1X8,
-	  V4L2_MBUS_FMT_Y8_1X8, V4L2_PIX_FMT_GREY, 8, },
+	  V4L2_MBUS_FMT_Y8_1X8, V4L2_MBUS_FMT_Y8_1X8,
+	  V4L2_PIX_FMT_GREY, 8, },
+	{ V4L2_MBUS_FMT_Y10_1X10, V4L2_MBUS_FMT_Y10_1X10,
+	  V4L2_MBUS_FMT_Y10_1X10, V4L2_MBUS_FMT_Y8_1X8,
+	  V4L2_PIX_FMT_Y10, 10, },
+	{ V4L2_MBUS_FMT_Y12_1X12, V4L2_MBUS_FMT_Y10_1X10,
+	  V4L2_MBUS_FMT_Y12_1X12, V4L2_MBUS_FMT_Y8_1X8,
+	  V4L2_PIX_FMT_Y12, 12, },
+	{ V4L2_MBUS_FMT_SBGGR8_1X8, V4L2_MBUS_FMT_SBGGR8_1X8,
+	  V4L2_MBUS_FMT_SBGGR8_1X8, V4L2_MBUS_FMT_SBGGR8_1X8,
+	  V4L2_PIX_FMT_SBGGR8, 8, },
+	{ V4L2_MBUS_FMT_SGBRG8_1X8, V4L2_MBUS_FMT_SGBRG8_1X8,
+	  V4L2_MBUS_FMT_SGBRG8_1X8, V4L2_MBUS_FMT_SGBRG8_1X8,
+	  V4L2_PIX_FMT_SGBRG8, 8, },
+	{ V4L2_MBUS_FMT_SGRBG8_1X8, V4L2_MBUS_FMT_SGRBG8_1X8,
+	  V4L2_MBUS_FMT_SGRBG8_1X8, V4L2_MBUS_FMT_SGRBG8_1X8,
+	  V4L2_PIX_FMT_SGRBG8, 8, },
+	{ V4L2_MBUS_FMT_SRGGB8_1X8, V4L2_MBUS_FMT_SRGGB8_1X8,
+	  V4L2_MBUS_FMT_SRGGB8_1X8, V4L2_MBUS_FMT_SRGGB8_1X8,
+	  V4L2_PIX_FMT_SRGGB8, 8, },
 	{ V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8, V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8,
-	  V4L2_MBUS_FMT_SGRBG10_1X10, V4L2_PIX_FMT_SGRBG10DPCM8, 8, },
+	  V4L2_MBUS_FMT_SGRBG10_1X10, 0,
+	  V4L2_PIX_FMT_SGRBG10DPCM8, 8, },
 	{ V4L2_MBUS_FMT_SBGGR10_1X10, V4L2_MBUS_FMT_SBGGR10_1X10,
-	  V4L2_MBUS_FMT_SBGGR10_1X10, V4L2_PIX_FMT_SBGGR10, 10, },
+	  V4L2_MBUS_FMT_SBGGR10_1X10, V4L2_MBUS_FMT_SBGGR8_1X8,
+	  V4L2_PIX_FMT_SBGGR10, 10, },
 	{ V4L2_MBUS_FMT_SGBRG10_1X10, V4L2_MBUS_FMT_SGBRG10_1X10,
-	  V4L2_MBUS_FMT_SGBRG10_1X10, V4L2_PIX_FMT_SGBRG10, 10, },
+	  V4L2_MBUS_FMT_SGBRG10_1X10, V4L2_MBUS_FMT_SGBRG8_1X8,
+	  V4L2_PIX_FMT_SGBRG10, 10, },
 	{ V4L2_MBUS_FMT_SGRBG10_1X10, V4L2_MBUS_FMT_SGRBG10_1X10,
-	  V4L2_MBUS_FMT_SGRBG10_1X10, V4L2_PIX_FMT_SGRBG10, 10, },
+	  V4L2_MBUS_FMT_SGRBG10_1X10, V4L2_MBUS_FMT_SGRBG8_1X8,
+	  V4L2_PIX_FMT_SGRBG10, 10, },
 	{ V4L2_MBUS_FMT_SRGGB10_1X10, V4L2_MBUS_FMT_SRGGB10_1X10,
-	  V4L2_MBUS_FMT_SRGGB10_1X10, V4L2_PIX_FMT_SRGGB10, 10, },
+	  V4L2_MBUS_FMT_SRGGB10_1X10, V4L2_MBUS_FMT_SRGGB8_1X8,
+	  V4L2_PIX_FMT_SRGGB10, 10, },
 	{ V4L2_MBUS_FMT_SBGGR12_1X12, V4L2_MBUS_FMT_SBGGR10_1X10,
-	  V4L2_MBUS_FMT_SBGGR12_1X12, V4L2_PIX_FMT_SBGGR12, 12, },
+	  V4L2_MBUS_FMT_SBGGR12_1X12, V4L2_MBUS_FMT_SBGGR8_1X8,
+	  V4L2_PIX_FMT_SBGGR12, 12, },
 	{ V4L2_MBUS_FMT_SGBRG12_1X12, V4L2_MBUS_FMT_SGBRG10_1X10,
-	  V4L2_MBUS_FMT_SGBRG12_1X12, V4L2_PIX_FMT_SGBRG12, 12, },
+	  V4L2_MBUS_FMT_SGBRG12_1X12, V4L2_MBUS_FMT_SGBRG8_1X8,
+	  V4L2_PIX_FMT_SGBRG12, 12, },
 	{ V4L2_MBUS_FMT_SGRBG12_1X12, V4L2_MBUS_FMT_SGRBG10_1X10,
-	  V4L2_MBUS_FMT_SGRBG12_1X12, V4L2_PIX_FMT_SGRBG12, 12, },
+	  V4L2_MBUS_FMT_SGRBG12_1X12, V4L2_MBUS_FMT_SGRBG8_1X8,
+	  V4L2_PIX_FMT_SGRBG12, 12, },
 	{ V4L2_MBUS_FMT_SRGGB12_1X12, V4L2_MBUS_FMT_SRGGB10_1X10,
-	  V4L2_MBUS_FMT_SRGGB12_1X12, V4L2_PIX_FMT_SRGGB12, 12, },
+	  V4L2_MBUS_FMT_SRGGB12_1X12, V4L2_MBUS_FMT_SRGGB8_1X8,
+	  V4L2_PIX_FMT_SRGGB12, 12, },
 	{ V4L2_MBUS_FMT_UYVY8_1X16, V4L2_MBUS_FMT_UYVY8_1X16,
-	  V4L2_MBUS_FMT_UYVY8_1X16, V4L2_PIX_FMT_UYVY, 16, },
+	  V4L2_MBUS_FMT_UYVY8_1X16, 0,
+	  V4L2_PIX_FMT_UYVY, 16, },
 	{ V4L2_MBUS_FMT_YUYV8_1X16, V4L2_MBUS_FMT_YUYV8_1X16,
-	  V4L2_MBUS_FMT_YUYV8_1X16, V4L2_PIX_FMT_YUYV, 16, },
+	  V4L2_MBUS_FMT_YUYV8_1X16, 0,
+	  V4L2_PIX_FMT_YUYV, 16, },
 };
 
 const struct isp_format_info *
@@ -86,6 +116,37 @@ omap3isp_video_format_info(enum v4l2_mbus_pixelcode code)
 }
 
 /*
+ * Decide whether desired output pixel code can be obtained with
+ * the lane shifter by shifting the input pixel code.
+ * @in: input pixelcode to shifter
+ * @out: output pixelcode from shifter
+ * @additional_shift: # of bits the sensor's LSB is offset from CAMEXT[0]
+ *
+ * return true if the combination is possible
+ * return false otherwise
+ */
+static bool isp_video_is_shiftable(enum v4l2_mbus_pixelcode in,
+		enum v4l2_mbus_pixelcode out,
+		unsigned int additional_shift)
+{
+	const struct isp_format_info *in_info, *out_info;
+
+	if (in == out)
+		return true;
+
+	in_info = omap3isp_video_format_info(in);
+	out_info = omap3isp_video_format_info(out);
+
+	if ((in_info->flavor == 0) || (out_info->flavor == 0))
+		return false;
+
+	if (in_info->flavor != out_info->flavor)
+		return false;
+
+	return in_info->bpp - out_info->bpp + additional_shift <= 6;
+}
+
+/*
  * isp_video_mbus_to_pix - Convert v4l2_mbus_framefmt to v4l2_pix_format
  * @video: ISP video instance
  * @mbus: v4l2_mbus_framefmt format (input)
@@ -235,6 +296,7 @@ static int isp_video_validate_pipeline(struct isp_pipeline *pipe)
 		return -EPIPE;
 
 	while (1) {
+		unsigned int shifter_link;
 		/* Retrieve the sink format */
 		pad = &subdev->entity.pads[0];
 		if (!(pad->flags & MEDIA_PAD_FL_SINK))
@@ -263,6 +325,10 @@ static int isp_video_validate_pipeline(struct isp_pipeline *pipe)
 				return -ENOSPC;
 		}
 
+		/* If sink pad is on CCDC, the link has the lane shifter
+		 * in the middle of it. */
+		shifter_link = subdev == &isp->isp_ccdc.subdev;
+
 		/* Retrieve the source format */
 		pad = media_entity_remote_source(pad);
 		if (pad == NULL ||
@@ -278,10 +344,24 @@ static int isp_video_validate_pipeline(struct isp_pipeline *pipe)
 			return -EPIPE;
 
 		/* Check if the two ends match */
-		if (fmt_source.format.code != fmt_sink.format.code ||
-		    fmt_source.format.width != fmt_sink.format.width ||
+		if (fmt_source.format.width != fmt_sink.format.width ||
 		    fmt_source.format.height != fmt_sink.format.height)
 			return -EPIPE;
+
+		if (shifter_link) {
+			unsigned int parallel_shift = 0;
+			if (isp->isp_ccdc.input == CCDC_INPUT_PARALLEL) {
+				struct isp_parallel_platform_data *pdata =
+					&((struct isp_v4l2_subdevs_group *)
+					      subdev->host_priv)->bus.parallel;
+				parallel_shift = pdata->data_lane_shift * 2;
+			}
+			if (!isp_video_is_shiftable(fmt_source.format.code,
+						fmt_sink.format.code,
+						parallel_shift))
+				return -EPIPE;
+		} else if (fmt_source.format.code != fmt_sink.format.code)
+			return -EPIPE;
 	}
 
 	return 0;
diff --git a/drivers/media/video/omap3isp/ispvideo.h b/drivers/media/video/omap3isp/ispvideo.h
index 524a1acd0906..911bea64e78a 100644
--- a/drivers/media/video/omap3isp/ispvideo.h
+++ b/drivers/media/video/omap3isp/ispvideo.h
@@ -49,6 +49,8 @@ struct v4l2_pix_format;
  *	bits. Identical to @code if the format is 10 bits wide or less.
  * @uncompressed: V4L2 media bus format code for the corresponding uncompressed
  *	format. Identical to @code if the format is not DPCM compressed.
+ * @flavor: V4L2 media bus format code for the same pixel layout but
+ *	shifted to be 8 bits per pixel. =0 if format is not shiftable.
  * @pixelformat: V4L2 pixel format FCC identifier
  * @bpp: Bits per pixel
  */
@@ -56,6 +58,7 @@ struct isp_format_info {
 	enum v4l2_mbus_pixelcode code;
 	enum v4l2_mbus_pixelcode truncated;
 	enum v4l2_mbus_pixelcode uncompressed;
+	enum v4l2_mbus_pixelcode flavor;
 	u32 pixelformat;
 	unsigned int bpp;
 };
diff --git a/drivers/media/video/s5p-fimc/fimc-capture.c b/drivers/media/video/s5p-fimc/fimc-capture.c
index 95f8b4e11e46..d142b40ea64e 100644
--- a/drivers/media/video/s5p-fimc/fimc-capture.c
+++ b/drivers/media/video/s5p-fimc/fimc-capture.c
@@ -527,7 +527,7 @@ static int fimc_cap_s_fmt_mplane(struct file *file, void *priv,
 	if (ret)
 		return ret;
 
-	if (vb2_is_streaming(&fimc->vid_cap.vbq) || fimc_capture_active(fimc))
+	if (vb2_is_busy(&fimc->vid_cap.vbq) || fimc_capture_active(fimc))
 		return -EBUSY;
 
 	frame = &ctx->d_frame;
@@ -539,8 +539,10 @@ static int fimc_cap_s_fmt_mplane(struct file *file, void *priv,
 		return -EINVAL;
 	}
 
-	for (i = 0; i < frame->fmt->colplanes; i++)
-		frame->payload[i] = pix->plane_fmt[i].bytesperline * pix->height;
+	for (i = 0; i < frame->fmt->colplanes; i++) {
+		frame->payload[i] =
+			(pix->width * pix->height * frame->fmt->depth[i]) >> 3;
+	}
 
 	/* Output DMA frame pixel size and offsets. */
 	frame->f_width = pix->plane_fmt[0].bytesperline * 8
diff --git a/drivers/media/video/s5p-fimc/fimc-core.c b/drivers/media/video/s5p-fimc/fimc-core.c
index 6c919b38a3d8..dc91a8511af6 100644
--- a/drivers/media/video/s5p-fimc/fimc-core.c
+++ b/drivers/media/video/s5p-fimc/fimc-core.c
@@ -361,10 +361,20 @@ static void fimc_capture_irq_handler(struct fimc_dev *fimc)
 {
 	struct fimc_vid_cap *cap = &fimc->vid_cap;
 	struct fimc_vid_buffer *v_buf;
+	struct timeval *tv;
+	struct timespec ts;
 
 	if (!list_empty(&cap->active_buf_q) &&
 	    test_bit(ST_CAPT_RUN, &fimc->state)) {
+		ktime_get_real_ts(&ts);
+
 		v_buf = active_queue_pop(cap);
+
+		tv = &v_buf->vb.v4l2_buf.timestamp;
+		tv->tv_sec = ts.tv_sec;
+		tv->tv_usec = ts.tv_nsec / NSEC_PER_USEC;
+		v_buf->vb.v4l2_buf.sequence = cap->frame_count++;
+
 		vb2_buffer_done(&v_buf->vb, VB2_BUF_STATE_DONE);
 	}
 
@@ -758,7 +768,7 @@ static void fimc_unlock(struct vb2_queue *vq)
 	mutex_unlock(&ctx->fimc_dev->lock);
 }
 
-struct vb2_ops fimc_qops = {
+static struct vb2_ops fimc_qops = {
 	.queue_setup	 = fimc_queue_setup,
 	.buf_prepare	 = fimc_buf_prepare,
 	.buf_queue	 = fimc_buf_queue,
@@ -927,23 +937,23 @@ int fimc_vidioc_try_fmt_mplane(struct file *file, void *priv,
 	pix->num_planes = fmt->memplanes;
 	pix->colorspace	= V4L2_COLORSPACE_JPEG;
 
-	for (i = 0; i < pix->num_planes; ++i) {
-		int bpl = pix->plane_fmt[i].bytesperline;
 
-		dbg("[%d] bpl: %d, depth: %d, w: %d, h: %d",
-		    i, bpl, fmt->depth[i], pix->width, pix->height);
+	for (i = 0; i < pix->num_planes; ++i) {
+		u32 bpl = pix->plane_fmt[i].bytesperline;
+		u32 *sizeimage = &pix->plane_fmt[i].sizeimage;
 
-		if (!bpl || (bpl * 8 / fmt->depth[i]) > pix->width)
-			bpl = (pix->width * fmt->depth[0]) >> 3;
+		if (fmt->colplanes > 1 && (bpl == 0 || bpl < pix->width))
+			bpl = pix->width; /* Planar */
 
-		if (!pix->plane_fmt[i].sizeimage)
-			pix->plane_fmt[i].sizeimage = pix->height * bpl;
+		if (fmt->colplanes == 1 && /* Packed */
+		    (bpl == 0 || ((bpl * 8) / fmt->depth[i]) < pix->width))
+			bpl = (pix->width * fmt->depth[0]) / 8;
 
-		pix->plane_fmt[i].bytesperline = bpl;
+		if (i == 0) /* Same bytesperline for each plane. */
+			mod_x = bpl;
 
-		dbg("[%d]: bpl: %d, sizeimage: %d",
-		    i, pix->plane_fmt[i].bytesperline,
-		    pix->plane_fmt[i].sizeimage);
+		pix->plane_fmt[i].bytesperline = mod_x;
+		*sizeimage = (pix->width * pix->height * fmt->depth[i]) / 8;
 	}
 
 	return 0;
@@ -965,7 +975,7 @@ static int fimc_m2m_s_fmt_mplane(struct file *file, void *priv,
 
 	vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
 
-	if (vb2_is_streaming(vq)) {
+	if (vb2_is_busy(vq)) {
 		v4l2_err(&fimc->m2m.v4l2_dev, "queue (%d) busy\n", f->type);
 		return -EBUSY;
 	}
@@ -985,8 +995,10 @@ static int fimc_m2m_s_fmt_mplane(struct file *file, void *priv,
 	if (!frame->fmt)
 		return -EINVAL;
 
-	for (i = 0; i < frame->fmt->colplanes; i++)
-		frame->payload[i] = pix->plane_fmt[i].bytesperline * pix->height;
+	for (i = 0; i < frame->fmt->colplanes; i++) {
+		frame->payload[i] =
+			(pix->width * pix->height * frame->fmt->depth[i]) / 8;
+	}
 
 	frame->f_width	= pix->plane_fmt[0].bytesperline * 8 /
 		frame->fmt->depth[0];
@@ -1750,7 +1762,7 @@ static int __devexit fimc_remove(struct platform_device *pdev)
 }
 
 /* Image pixel limits, similar across several FIMC HW revisions. */
-static struct fimc_pix_limit s5p_pix_limit[3] = {
+static struct fimc_pix_limit s5p_pix_limit[4] = {
 	[0] = {
 		.scaler_en_w	= 3264,
 		.scaler_dis_w	= 8192,
@@ -1775,6 +1787,14 @@ static struct fimc_pix_limit s5p_pix_limit[3] = {
 		.out_rot_en_w	= 1280,
 		.out_rot_dis_w	= 1920,
 	},
+	[3] = {
+		.scaler_en_w	= 1920,
+		.scaler_dis_w	= 8192,
+		.in_rot_en_h	= 1366,
+		.in_rot_dis_w	= 8192,
+		.out_rot_en_w	= 1366,
+		.out_rot_dis_w	= 1920,
+	},
 };
 
 static struct samsung_fimc_variant fimc0_variant_s5p = {
@@ -1827,7 +1847,7 @@ static struct samsung_fimc_variant fimc2_variant_s5pv210 = {
 	.pix_limit	 = &s5p_pix_limit[2],
 };
 
-static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
+static struct samsung_fimc_variant fimc0_variant_exynos4 = {
 	.pix_hoff	 = 1,
 	.has_inp_rot	 = 1,
 	.has_out_rot	 = 1,
@@ -1840,7 +1860,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
 	.pix_limit	 = &s5p_pix_limit[1],
 };
 
-static struct samsung_fimc_variant fimc2_variant_s5pv310 = {
+static struct samsung_fimc_variant fimc2_variant_exynos4 = {
 	.pix_hoff	 = 1,
 	.has_cistatus2	 = 1,
 	.has_mainscaler_ext = 1,
@@ -1848,7 +1868,7 @@ static struct samsung_fimc_variant fimc2_variant_s5pv310 = {
 	.min_out_pixsize = 16,
 	.hor_offs_align	 = 1,
 	.out_buf_count	 = 32,
-	.pix_limit	 = &s5p_pix_limit[2],
+	.pix_limit	 = &s5p_pix_limit[3],
 };
 
 /* S5PC100 */
@@ -1874,12 +1894,12 @@ static struct samsung_fimc_driverdata fimc_drvdata_s5pv210 = {
 };
 
 /* S5PV310, S5PC210 */
-static struct samsung_fimc_driverdata fimc_drvdata_s5pv310 = {
+static struct samsung_fimc_driverdata fimc_drvdata_exynos4 = {
 	.variant = {
-		[0] = &fimc0_variant_s5pv310,
-		[1] = &fimc0_variant_s5pv310,
-		[2] = &fimc0_variant_s5pv310,
-		[3] = &fimc2_variant_s5pv310,
+		[0] = &fimc0_variant_exynos4,
+		[1] = &fimc0_variant_exynos4,
+		[2] = &fimc0_variant_exynos4,
+		[3] = &fimc2_variant_exynos4,
 	},
 	.num_entities = 4,
 	.lclk_frequency = 166000000UL,
@@ -1893,8 +1913,8 @@ static struct platform_device_id fimc_driver_ids[] = {
 		.name		= "s5pv210-fimc",
 		.driver_data	= (unsigned long)&fimc_drvdata_s5pv210,
 	}, {
-		.name		= "s5pv310-fimc",
-		.driver_data	= (unsigned long)&fimc_drvdata_s5pv310,
+		.name		= "exynos4-fimc",
+		.driver_data	= (unsigned long)&fimc_drvdata_exynos4,
 	},
 	{},
 };
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 3fe54bf41142..134e86bf6d97 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -922,7 +922,7 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, unsigned int
 			/* Try 2560x1920, 1280x960, 640x480, 320x240 */
 			mf.width	= 2560 >> shift;
 			mf.height	= 1920 >> shift;
-			ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+			ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video,
 							 s_mbus_fmt, &mf);
 			if (ret < 0)
 				return ret;
@@ -1224,7 +1224,7 @@ static int client_s_fmt(struct soc_camera_device *icd,
 	struct v4l2_cropcap cap;
 	int ret;
 
-	ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+	ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video,
 					 s_mbus_fmt, mf);
 	if (ret < 0)
 		return ret;
@@ -1254,7 +1254,7 @@ static int client_s_fmt(struct soc_camera_device *icd,
 		tmp_h = min(2 * tmp_h, max_height);
 		mf->width = tmp_w;
 		mf->height = tmp_h;
-		ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+		ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video,
 						 s_mbus_fmt, mf);
 		dev_geo(dev, "Camera scaled to %ux%u\n",
 			mf->width, mf->height);
@@ -1658,7 +1658,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	mf.code		= xlate->code;
 	mf.colorspace	= pix->colorspace;
 
-	ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video, try_mbus_fmt, &mf);
+	ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video, try_mbus_fmt, &mf);
 	if (ret < 0)
 		return ret;
 
@@ -1682,7 +1682,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 			 */
 			mf.width = 2560;
 			mf.height = 1920;
-			ret = v4l2_device_call_until_err(sd->v4l2_dev, 0, video,
+			ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video,
 							 try_mbus_fmt, &mf);
 			if (ret < 0) {
 				/* Shouldn't actually happen... */
diff --git a/drivers/media/video/sh_mobile_csi2.c b/drivers/media/video/sh_mobile_csi2.c
index dd1b81b1442b..98b87481fa94 100644
--- a/drivers/media/video/sh_mobile_csi2.c
+++ b/drivers/media/video/sh_mobile_csi2.c
@@ -38,6 +38,8 @@ struct sh_csi2 {
 	void __iomem			*base;
 	struct platform_device		*pdev;
 	struct sh_csi2_client_config	*client;
+	unsigned long (*query_bus_param)(struct soc_camera_device *);
+	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 };
 
 static int sh_csi2_try_fmt(struct v4l2_subdev *sd,
@@ -208,6 +210,7 @@ static int sh_csi2_notify(struct notifier_block *nb,
 	case BUS_NOTIFY_BOUND_DRIVER:
 		snprintf(priv->subdev.name, V4L2_SUBDEV_NAME_SIZE, "%s%s",
 			 dev_name(v4l2_dev->dev), ".mipi-csi");
+		priv->subdev.grp_id = (long)icd;
 		ret = v4l2_device_register_subdev(v4l2_dev, &priv->subdev);
 		dev_dbg(dev, "%s(%p): ret(register_subdev) = %d\n", __func__, priv, ret);
 		if (ret < 0)
@@ -215,6 +218,8 @@ static int sh_csi2_notify(struct notifier_block *nb,
 
 		priv->client = pdata->clients + i;
 
+		priv->set_bus_param		= icd->ops->set_bus_param;
+		priv->query_bus_param		= icd->ops->query_bus_param;
 		icd->ops->set_bus_param		= sh_csi2_set_bus_param;
 		icd->ops->query_bus_param	= sh_csi2_query_bus_param;
 
@@ -226,8 +231,10 @@ static int sh_csi2_notify(struct notifier_block *nb,
 		priv->client = NULL;
 
 		/* Driver is about to be unbound */
-		icd->ops->set_bus_param		= NULL;
-		icd->ops->query_bus_param	= NULL;
+		icd->ops->set_bus_param		= priv->set_bus_param;
+		icd->ops->query_bus_param	= priv->query_bus_param;
+		priv->set_bus_param		= NULL;
+		priv->query_bus_param		= NULL;
 
 		v4l2_device_unregister_subdev(&priv->subdev);
 
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 46284489e4eb..ddb4c091dedc 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -136,11 +136,50 @@ unsigned long soc_camera_apply_sensor_flags(struct soc_camera_link *icl,
 }
 EXPORT_SYMBOL(soc_camera_apply_sensor_flags);
 
+#define pixfmtstr(x) (x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, \
+	((x) >> 24) & 0xff
+
+static int soc_camera_try_fmt(struct soc_camera_device *icd,
+			      struct v4l2_format *f)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+	int ret;
+
+	dev_dbg(&icd->dev, "TRY_FMT(%c%c%c%c, %ux%u)\n",
+		pixfmtstr(pix->pixelformat), pix->width, pix->height);
+
+	pix->bytesperline = 0;
+	pix->sizeimage = 0;
+
+	ret = ici->ops->try_fmt(icd, f);
+	if (ret < 0)
+		return ret;
+
+	if (!pix->sizeimage) {
+		if (!pix->bytesperline) {
+			const struct soc_camera_format_xlate *xlate;
+
+			xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
+			if (!xlate)
+				return -EINVAL;
+
+			ret = soc_mbus_bytes_per_line(pix->width,
+						      xlate->host_fmt);
+			if (ret > 0)
+				pix->bytesperline = ret;
+		}
+		if (pix->bytesperline)
+			pix->sizeimage = pix->bytesperline * pix->height;
+	}
+
+	return 0;
+}
+
 static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 				      struct v4l2_format *f)
 {
 	struct soc_camera_device *icd = file->private_data;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
@@ -149,7 +188,7 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 		return -EINVAL;
 
 	/* limit format to hardware capabilities */
-	return ici->ops->try_fmt(icd, f);
+	return soc_camera_try_fmt(icd, f);
 }
 
 static int soc_camera_enum_input(struct file *file, void *priv,
@@ -362,9 +401,6 @@ static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 	icd->user_formats = NULL;
 }
 
-#define pixfmtstr(x) (x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, \
-	((x) >> 24) & 0xff
-
 /* Called with .vb_lock held, or from the first open(2), see comment there */
 static int soc_camera_set_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
@@ -377,7 +413,7 @@ static int soc_camera_set_fmt(struct soc_camera_device *icd,
 		pixfmtstr(pix->pixelformat), pix->width, pix->height);
 
 	/* We always call try_fmt() before set_fmt() or set_crop() */
-	ret = ici->ops->try_fmt(icd, f);
+	ret = soc_camera_try_fmt(icd, f);
 	if (ret < 0)
 		return ret;
 
@@ -996,10 +1032,11 @@ static void soc_camera_free_i2c(struct soc_camera_device *icd)
 {
 	struct i2c_client *client =
 		to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_adapter *adap = client->adapter;
 	dev_set_drvdata(&icd->dev, NULL);
 	v4l2_device_unregister_subdev(i2c_get_clientdata(client));
 	i2c_unregister_device(client);
-	i2c_put_adapter(client->adapter);
+	i2c_put_adapter(adap);
 }
 #else
 #define soc_camera_init_i2c(icd, icl)	(-ENODEV)
@@ -1071,6 +1108,9 @@ static int soc_camera_probe(struct device *dev)
 		}
 	}
 
+	sd = soc_camera_to_subdev(icd);
+	sd->grp_id = (long)icd;
+
 	/* At this point client .probe() should have run already */
 	ret = soc_camera_init_user_formats(icd);
 	if (ret < 0)
@@ -1092,7 +1132,6 @@ static int soc_camera_probe(struct device *dev)
 		goto evidstart;
 
 	/* Try to improve our guess of a reasonable window format */
-	sd = soc_camera_to_subdev(icd);
 	if (!v4l2_subdev_call(sd, video, g_mbus_fmt, &mf)) {
 		icd->user_width		= mf.width;
 		icd->user_height	= mf.height;
diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c
index 5d4cf3b3d435..22fa8202d5ca 100644
--- a/drivers/media/video/tda9840.c
+++ b/drivers/media/video/tda9840.c
@@ -171,7 +171,7 @@ static int tda9840_probe(struct i2c_client *client,
 	v4l_info(client, "chip found @ 0x%x (%s)\n",
 			client->addr << 1, client->adapter->name);
 
-	sd = kmalloc(sizeof(struct v4l2_subdev), GFP_KERNEL);
+	sd = kzalloc(sizeof(struct v4l2_subdev), GFP_KERNEL);
 	if (sd == NULL)
 		return -ENOMEM;
 	v4l2_i2c_subdev_init(sd, client, &tda9840_ops);
diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c
index 19621ed523ec..827425c5b866 100644
--- a/drivers/media/video/tea6415c.c
+++ b/drivers/media/video/tea6415c.c
@@ -152,7 +152,7 @@ static int tea6415c_probe(struct i2c_client *client,
 
 	v4l_info(client, "chip found @ 0x%x (%s)\n",
 			client->addr << 1, client->adapter->name);
-	sd = kmalloc(sizeof(struct v4l2_subdev), GFP_KERNEL);
+	sd = kzalloc(sizeof(struct v4l2_subdev), GFP_KERNEL);
 	if (sd == NULL)
 		return -ENOMEM;
 	v4l2_i2c_subdev_init(sd, client, &tea6415c_ops);
diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c
index 5ea840401f21..f350b6c24500 100644
--- a/drivers/media/video/tea6420.c
+++ b/drivers/media/video/tea6420.c
@@ -125,7 +125,7 @@ static int tea6420_probe(struct i2c_client *client,
 	v4l_info(client, "chip found @ 0x%x (%s)\n",
 			client->addr << 1, client->adapter->name);
 
-	sd = kmalloc(sizeof(struct v4l2_subdev), GFP_KERNEL);
+	sd = kzalloc(sizeof(struct v4l2_subdev), GFP_KERNEL);
 	if (sd == NULL)
 		return -ENOMEM;
 	v4l2_i2c_subdev_init(sd, client, &tea6420_ops);
diff --git a/drivers/media/video/upd64031a.c b/drivers/media/video/upd64031a.c
index f8138c75be8b..1aab96a88203 100644
--- a/drivers/media/video/upd64031a.c
+++ b/drivers/media/video/upd64031a.c
@@ -230,7 +230,7 @@ static int upd64031a_probe(struct i2c_client *client,
 	v4l_info(client, "chip found @ 0x%x (%s)\n",
 			client->addr << 1, client->adapter->name);
 
-	state = kmalloc(sizeof(struct upd64031a_state), GFP_KERNEL);
+	state = kzalloc(sizeof(struct upd64031a_state), GFP_KERNEL);
 	if (state == NULL)
 		return -ENOMEM;
 	sd = &state->sd;
diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c
index 28e0e6b6ca84..9bbe61700fd5 100644
--- a/drivers/media/video/upd64083.c
+++ b/drivers/media/video/upd64083.c
@@ -202,7 +202,7 @@ static int upd64083_probe(struct i2c_client *client,
 	v4l_info(client, "chip found @ 0x%x (%s)\n",
 			client->addr << 1, client->adapter->name);
 
-	state = kmalloc(sizeof(struct upd64083_state), GFP_KERNEL);
+	state = kzalloc(sizeof(struct upd64083_state), GFP_KERNEL);
 	if (state == NULL)
 		return -ENOMEM;
 	sd = &state->sd;
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 498e6742579e..6dc7196296b3 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -389,7 +389,8 @@ static int v4l2_open(struct inode *inode, struct file *filp)
 	video_get(vdev);
 	mutex_unlock(&videodev_lock);
 #if defined(CONFIG_MEDIA_CONTROLLER)
-	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev) {
+	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev &&
+	    vdev->vfl_type != VFL_TYPE_SUBDEV) {
 		entity = media_entity_get(&vdev->entity);
 		if (!entity) {
 			ret = -EBUSY;
@@ -415,7 +416,8 @@ err:
 	/* decrease the refcount in case of an error */
 	if (ret) {
 #if defined(CONFIG_MEDIA_CONTROLLER)
-		if (vdev->v4l2_dev && vdev->v4l2_dev->mdev)
+		if (vdev->v4l2_dev && vdev->v4l2_dev->mdev &&
+		    vdev->vfl_type != VFL_TYPE_SUBDEV)
 			media_entity_put(entity);
 #endif
 		video_put(vdev);
@@ -437,7 +439,8 @@ static int v4l2_release(struct inode *inode, struct file *filp)
 			mutex_unlock(vdev->lock);
 	}
 #if defined(CONFIG_MEDIA_CONTROLLER)
-	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev)
+	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev &&
+	    vdev->vfl_type != VFL_TYPE_SUBDEV)
 		media_entity_put(&vdev->entity);
 #endif
 	/* decrease the refcount unconditionally since the release()
@@ -686,7 +689,8 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
 
 #if defined(CONFIG_MEDIA_CONTROLLER)
 	/* Part 5: Register the entity. */
-	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev) {
+	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev &&
+	    vdev->vfl_type != VFL_TYPE_SUBDEV) {
 		vdev->entity.type = MEDIA_ENT_T_DEVNODE_V4L;
 		vdev->entity.name = vdev->name;
 		vdev->entity.v4l.major = VIDEO_MAJOR;
@@ -733,7 +737,8 @@ void video_unregister_device(struct video_device *vdev)
 		return;
 
 #if defined(CONFIG_MEDIA_CONTROLLER)
-	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev)
+	if (vdev->v4l2_dev && vdev->v4l2_dev->mdev &&
+	    vdev->vfl_type != VFL_TYPE_SUBDEV)
 		media_device_unregister_entity(&vdev->entity);
 #endif
 
diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c
index 5aeaf876ba9b..4aae501f02d0 100644
--- a/drivers/media/video/v4l2-device.c
+++ b/drivers/media/video/v4l2-device.c
@@ -155,8 +155,10 @@ int v4l2_device_register_subdev(struct v4l2_device *v4l2_dev,
 	sd->v4l2_dev = v4l2_dev;
 	if (sd->internal_ops && sd->internal_ops->registered) {
 		err = sd->internal_ops->registered(sd);
-		if (err)
+		if (err) {
+			module_put(sd->owner);
 			return err;
+		}
 	}
 
 	/* This just returns 0 if either of the two args is NULL */
@@ -164,6 +166,7 @@ int v4l2_device_register_subdev(struct v4l2_device *v4l2_dev,
 	if (err) {
 		if (sd->internal_ops && sd->internal_ops->unregistered)
 			sd->internal_ops->unregistered(sd);
+		module_put(sd->owner);
 		return err;
 	}
 
diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c
index 0b8064490676..812729ebf09e 100644
--- a/drivers/media/video/v4l2-subdev.c
+++ b/drivers/media/video/v4l2-subdev.c
@@ -155,25 +155,25 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 	switch (cmd) {
 	case VIDIOC_QUERYCTRL:
-		return v4l2_subdev_queryctrl(sd, arg);
+		return v4l2_queryctrl(sd->ctrl_handler, arg);
 
 	case VIDIOC_QUERYMENU:
-		return v4l2_subdev_querymenu(sd, arg);
+		return v4l2_querymenu(sd->ctrl_handler, arg);
 
 	case VIDIOC_G_CTRL:
-		return v4l2_subdev_g_ctrl(sd, arg);
+		return v4l2_g_ctrl(sd->ctrl_handler, arg);
 
 	case VIDIOC_S_CTRL:
-		return v4l2_subdev_s_ctrl(sd, arg);
+		return v4l2_s_ctrl(sd->ctrl_handler, arg);
 
 	case VIDIOC_G_EXT_CTRLS:
-		return v4l2_subdev_g_ext_ctrls(sd, arg);
+		return v4l2_g_ext_ctrls(sd->ctrl_handler, arg);
 
 	case VIDIOC_S_EXT_CTRLS:
-		return v4l2_subdev_s_ext_ctrls(sd, arg);
+		return v4l2_s_ext_ctrls(sd->ctrl_handler, arg);
 
 	case VIDIOC_TRY_EXT_CTRLS:
-		return v4l2_subdev_try_ext_ctrls(sd, arg);
+		return v4l2_try_ext_ctrls(sd->ctrl_handler, arg);
 
 	case VIDIOC_DQEVENT:
 		if (!(sd->flags & V4L2_SUBDEV_FL_HAS_EVENTS))
diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c
index c4742fc15529..c9691115f2d2 100644
--- a/drivers/media/video/videobuf-dma-contig.c
+++ b/drivers/media/video/videobuf-dma-contig.c
@@ -300,7 +300,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	retval = remap_pfn_range(vma, vma->vm_start,
-				 PFN_DOWN(virt_to_phys(mem->vaddr)),
+				 mem->dma_handle >> PAGE_SHIFT,
 				 size, vma->vm_page_prot);
 	if (retval) {
 		dev_err(q->dev, "mmap: remap failed with error %d. ", retval);
diff --git a/drivers/media/video/videobuf2-core.c b/drivers/media/video/videobuf2-core.c
index 6698c77e0f64..6ba1461d51ef 100644
--- a/drivers/media/video/videobuf2-core.c
+++ b/drivers/media/video/videobuf2-core.c
@@ -37,6 +37,9 @@ module_param(debug, int, 0644);
 #define call_qop(q, op, args...)					\
 	(((q)->ops->op) ? ((q)->ops->op(args)) : 0)
 
+#define V4L2_BUFFER_STATE_FLAGS	(V4L2_BUF_FLAG_MAPPED | V4L2_BUF_FLAG_QUEUED | \
+				 V4L2_BUF_FLAG_DONE | V4L2_BUF_FLAG_ERROR)
+
 /**
  * __vb2_buf_mem_alloc() - allocate video memory for the given buffer
  */
@@ -51,7 +54,7 @@ static int __vb2_buf_mem_alloc(struct vb2_buffer *vb,
 	for (plane = 0; plane < vb->num_planes; ++plane) {
 		mem_priv = call_memop(q, plane, alloc, q->alloc_ctx[plane],
 					plane_sizes[plane]);
-		if (!mem_priv)
+		if (IS_ERR_OR_NULL(mem_priv))
 			goto free;
 
 		/* Associate allocator private data with this plane */
@@ -284,7 +287,7 @@ static int __fill_v4l2_buffer(struct vb2_buffer *vb, struct v4l2_buffer *b)
 	struct vb2_queue *q = vb->vb2_queue;
 	int ret = 0;
 
-	/* Copy back data such as timestamp, input, etc. */
+	/* Copy back data such as timestamp, flags, input, etc. */
 	memcpy(b, &vb->v4l2_buf, offsetof(struct v4l2_buffer, m));
 	b->input = vb->v4l2_buf.input;
 	b->reserved = vb->v4l2_buf.reserved;
@@ -313,7 +316,10 @@ static int __fill_v4l2_buffer(struct vb2_buffer *vb, struct v4l2_buffer *b)
 			b->m.userptr = vb->v4l2_planes[0].m.userptr;
 	}
 
-	b->flags = 0;
+	/*
+	 * Clear any buffer state related flags.
+	 */
+	b->flags &= ~V4L2_BUFFER_STATE_FLAGS;
 
 	switch (vb->state) {
 	case VB2_BUF_STATE_QUEUED:
@@ -519,6 +525,7 @@ int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req)
 	num_buffers = min_t(unsigned int, req->count, VIDEO_MAX_FRAME);
 	memset(plane_sizes, 0, sizeof(plane_sizes));
 	memset(q->alloc_ctx, 0, sizeof(q->alloc_ctx));
+	q->memory = req->memory;
 
 	/*
 	 * Ask the driver how many buffers and planes per buffer it requires.
@@ -560,8 +567,6 @@ int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req)
 		ret = num_buffers;
 	}
 
-	q->memory = req->memory;
-
 	/*
 	 * Return the number of successfully allocated buffers
 	 * to the userspace.
@@ -715,6 +720,8 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb, struct v4l2_buffer *b,
 
 	vb->v4l2_buf.field = b->field;
 	vb->v4l2_buf.timestamp = b->timestamp;
+	vb->v4l2_buf.input = b->input;
+	vb->v4l2_buf.flags = b->flags & ~V4L2_BUFFER_STATE_FLAGS;
 
 	return 0;
 }
diff --git a/drivers/media/video/videobuf2-dma-contig.c b/drivers/media/video/videobuf2-dma-contig.c
index 58205d596138..a790a5f8c06f 100644
--- a/drivers/media/video/videobuf2-dma-contig.c
+++ b/drivers/media/video/videobuf2-dma-contig.c
@@ -46,7 +46,7 @@ static void *vb2_dma_contig_alloc(void *alloc_ctx, unsigned long size)
 					GFP_KERNEL);
 	if (!buf->vaddr) {
 		dev_err(conf->dev, "dma_alloc_coherent of size %ld failed\n",
-			buf->size);
+			size);
 		kfree(buf);
 		return ERR_PTR(-ENOMEM);
 	}
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 1735c84ff757..fe902338539b 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,8 +76,8 @@
 #define COPYRIGHT	"Copyright (c) 1999-2008 " MODULEAUTHOR
 #endif
 
-#define MPT_LINUX_VERSION_COMMON	"3.04.18"
-#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.18"
+#define MPT_LINUX_VERSION_COMMON	"3.04.19"
+#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.19"
 #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
 
 #define show_mptmod_ver(s,ver)  \
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 66f94125de4e..7596aecd5072 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -5012,7 +5012,6 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 			(ioc_stat & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)) {
 			VirtTarget *vtarget = NULL;
 			u8		id, channel;
-			u32	 log_info = le32_to_cpu(reply->IOCLogInfo);
 
 			id = sas_event_data->TargetID;
 			channel = sas_event_data->Bus;
@@ -5023,7 +5022,8 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 				    "LogInfo (0x%x) available for "
 				   "INTERNAL_DEVICE_RESET"
 				   "fw_id %d fw_channel %d\n", ioc->name,
-				   log_info, id, channel));
+				   le32_to_cpu(reply->IOCLogInfo),
+				   id, channel));
 				if (vtarget->raidVolume) {
 					devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 					"Skipping Raid Volume for inDMD\n",
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 0d9b82a44540..a1d4ee6671be 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -1415,11 +1415,8 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "qcmd: SCpnt=%p, done()=%p\n",
 		ioc->name, SCpnt, done));
 
-	if (ioc->taskmgmt_quiesce_io) {
-		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT "qcmd: SCpnt=%p timeout + 60HZ\n",
-			ioc->name, SCpnt));
+	if (ioc->taskmgmt_quiesce_io)
 		return SCSI_MLQUEUE_HOST_BUSY;
-	}
 
 	/*
 	 *  Put together a MPT SCSI request...
@@ -1773,7 +1770,6 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 	int		 scpnt_idx;
 	int		 retval;
 	VirtDevice	 *vdevice;
-	ulong	 	 sn = SCpnt->serial_number;
 	MPT_ADAPTER	*ioc;
 
 	/* If we can't locate our host adapter structure, return FAILED status.
@@ -1859,8 +1855,7 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 			 vdevice->vtarget->id, vdevice->lun,
 			 ctx2abort, mptscsih_get_tm_timeout(ioc));
 
-	if (SCPNT_TO_LOOKUP_IDX(ioc, SCpnt) == scpnt_idx &&
-	    SCpnt->serial_number == sn) {
+	if (SCPNT_TO_LOOKUP_IDX(ioc, SCpnt) == scpnt_idx) {
 		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		    "task abort: command still in active list! (sc=%p)\n",
 		    ioc->name, SCpnt));
@@ -1873,9 +1868,9 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 	}
 
  out:
-	printk(MYIOC_s_INFO_FMT "task abort: %s (rv=%04x) (sc=%p) (sn=%ld)\n",
+	printk(MYIOC_s_INFO_FMT "task abort: %s (rv=%04x) (sc=%p)\n",
 	    ioc->name, ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), retval,
-	    SCpnt, SCpnt->serial_number);
+	    SCpnt);
 
 	return retval;
 }
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 6d9568d2ec59..8f61ba6aac23 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -867,6 +867,10 @@ static int mptspi_write_spi_device_pg1(struct scsi_target *starget,
 	struct _x_config_parms cfg;
 	struct _CONFIG_PAGE_HEADER hdr;
 	int err = -EBUSY;
+	u32 nego_parms;
+	u32 period;
+	struct scsi_device *sdev;
+	int i;
 
 	/* don't allow updating nego parameters on RAID devices */
 	if (starget->channel == 0 &&
@@ -904,6 +908,24 @@ static int mptspi_write_spi_device_pg1(struct scsi_target *starget,
 	pg1->Header.PageNumber = hdr.PageNumber;
 	pg1->Header.PageType = hdr.PageType;
 
+	nego_parms = le32_to_cpu(pg1->RequestedParameters);
+	period = (nego_parms & MPI_SCSIDEVPAGE1_RP_MIN_SYNC_PERIOD_MASK) >>
+		MPI_SCSIDEVPAGE1_RP_SHIFT_MIN_SYNC_PERIOD;
+	if (period == 8) {
+		/* Turn on inline data padding for TAPE when running U320 */
+		for (i = 0 ; i < 16; i++) {
+			sdev = scsi_device_lookup_by_target(starget, i);
+			if (sdev && sdev->type == TYPE_TAPE) {
+				sdev_printk(KERN_DEBUG, sdev, MYIOC_s_FMT
+					    "IDP:ON\n", ioc->name);
+				nego_parms |= MPI_SCSIDEVPAGE1_RP_IDP;
+				pg1->RequestedParameters =
+				    cpu_to_le32(nego_parms);
+				break;
+			}
+		}
+	}
+
 	mptspi_print_write_nego(hd, starget, le32_to_cpu(pg1->RequestedParameters));
 
 	if (mpt_config(ioc, &cfg)) {
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 643ad52e3ca2..4796bbf0ae4e 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -1000,7 +1000,6 @@ static struct i2o_block_device *i2o_block_device_alloc(void)
 	gd->major = I2O_MAJOR;
 	gd->queue = queue;
 	gd->fops = &i2o_block_fops;
-	gd->events = DISK_EVENT_MEDIA_CHANGE;
 	gd->private_data = dev;
 
 	dev->gd = gd;
diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c
index f003957e8e1c..74fbe56321ff 100644
--- a/drivers/message/i2o/i2o_scsi.c
+++ b/drivers/message/i2o/i2o_scsi.c
@@ -361,7 +361,7 @@ static int i2o_scsi_reply(struct i2o_controller *c, u32 m,
 	 */
 	error = le32_to_cpu(msg->body[0]);
 
-	osm_debug("Completed %ld\n", cmd->serial_number);
+	osm_debug("Completed %0x%p\n", cmd);
 
 	cmd->result = error & 0xff;
 	/*
@@ -678,7 +678,7 @@ static int i2o_scsi_queuecommand_lck(struct scsi_cmnd *SCpnt,
 	/* Queue the message */
 	i2o_msg_post(c, msg);
 
-	osm_debug("Issued %ld\n", SCpnt->serial_number);
+	osm_debug("Issued %0x%p\n", SCpnt);
 
 	return 0;
 
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index d4a851c6b5bf..0b4d5b23bec9 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -144,7 +144,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 	int iter, i;
 	unsigned long flags;
 
-	data->chip->irq_ack(irq_data);
+	data->chip->irq_ack(data);
 
 	for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) {
 		u32 status;
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index d01574d98870..f4c8c844b913 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -55,6 +55,19 @@ int mfd_cell_disable(struct platform_device *pdev)
 }
 EXPORT_SYMBOL(mfd_cell_disable);
 
+static int mfd_platform_add_cell(struct platform_device *pdev,
+				 const struct mfd_cell *cell)
+{
+	if (!cell)
+		return 0;
+
+	pdev->mfd_cell = kmemdup(cell, sizeof(*cell), GFP_KERNEL);
+	if (!pdev->mfd_cell)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int mfd_add_device(struct device *parent, int id,
 			  const struct mfd_cell *cell,
 			  struct resource *mem_base,
@@ -75,7 +88,7 @@ static int mfd_add_device(struct device *parent, int id,
 
 	pdev->dev.parent = parent;
 
-	ret = platform_device_add_data(pdev, cell, sizeof(*cell));
+	ret = mfd_platform_add_cell(pdev, cell);
 	if (ret)
 		goto fail_res;
 
@@ -123,7 +136,6 @@ static int mfd_add_device(struct device *parent, int id,
 
 	return 0;
 
-/*	platform_device_del(pdev); */
 fail_res:
 	kfree(res);
 fail_device:
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 53450f433f10..3ab9ffa00aad 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -25,7 +25,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/gpio.h>
-#include <linux/regulator/consumer.h>
 #include <plat/usb.h>
 
 #define USBHS_DRIVER_NAME	"usbhs-omap"
@@ -700,8 +699,7 @@ static int usbhs_enable(struct device *dev)
 	dev_dbg(dev, "starting TI HSUSB Controller\n");
 	if (!pdata) {
 		dev_dbg(dev, "missing platform_data\n");
-		ret =  -ENODEV;
-		goto end_enable;
+		return  -ENODEV;
 	}
 
 	spin_lock_irqsave(&omap->lock, flags);
@@ -719,14 +717,14 @@ static int usbhs_enable(struct device *dev)
 			gpio_request(pdata->ehci_data->reset_gpio_port[0],
 						"USB1 PHY reset");
 			gpio_direction_output
-				(pdata->ehci_data->reset_gpio_port[0], 1);
+				(pdata->ehci_data->reset_gpio_port[0], 0);
 		}
 
 		if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1])) {
 			gpio_request(pdata->ehci_data->reset_gpio_port[1],
 						"USB2 PHY reset");
 			gpio_direction_output
-				(pdata->ehci_data->reset_gpio_port[1], 1);
+				(pdata->ehci_data->reset_gpio_port[1], 0);
 		}
 
 		/* Hold the PHY in RESET for enough time till DIR is high */
@@ -906,16 +904,17 @@ static int usbhs_enable(struct device *dev)
 
 		if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[0]))
 			gpio_set_value
-				(pdata->ehci_data->reset_gpio_port[0], 0);
+				(pdata->ehci_data->reset_gpio_port[0], 1);
 
 		if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1]))
 			gpio_set_value
-				(pdata->ehci_data->reset_gpio_port[1], 0);
+				(pdata->ehci_data->reset_gpio_port[1], 1);
 	}
 
 end_count:
 	omap->count++;
-	goto end_enable;
+	spin_unlock_irqrestore(&omap->lock, flags);
+	return 0;
 
 err_tll:
 	if (pdata->ehci_data->phy_reset) {
@@ -931,8 +930,6 @@ err_tll:
 	clk_disable(omap->usbhost_fs_fck);
 	clk_disable(omap->usbhost_hs_fck);
 	clk_disable(omap->usbhost_ick);
-
-end_enable:
 	spin_unlock_irqrestore(&omap->lock, flags);
 	return ret;
 }
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 16422de0823a..2c0d4d16491a 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -447,12 +447,13 @@ static int __init load_twl4030_script(struct twl4030_script *tscript,
 		if (err)
 			goto out;
 	}
-	if (tscript->flags & TWL4030_SLEEP_SCRIPT)
+	if (tscript->flags & TWL4030_SLEEP_SCRIPT) {
 		if (order)
 			pr_warning("TWL4030: Bad order of scripts (sleep "\
 					"script before wakeup) Leads to boot"\
 					"failure on some boards\n");
 		err = twl4030_config_sleep_sequence(address);
+	}
 out:
 	return err;
 }
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 4e007c6a4b44..d80dcdee88f3 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -481,5 +481,6 @@ source "drivers/misc/cb710/Kconfig"
 source "drivers/misc/iwmc3200top/Kconfig"
 source "drivers/misc/ti-st/Kconfig"
 source "drivers/misc/lis3lv02d/Kconfig"
+source "drivers/misc/carma/Kconfig"
 
 endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f5468602961f..848e8464faab 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -44,3 +44,4 @@ obj-$(CONFIG_PCH_PHUB)		+= pch_phub.o
 obj-y				+= ti-st/
 obj-$(CONFIG_AB8500_PWM)	+= ab8500-pwm.o
 obj-y				+= lis3lv02d/
+obj-y				+= carma/
diff --git a/drivers/misc/carma/Kconfig b/drivers/misc/carma/Kconfig
new file mode 100644
index 000000000000..c90370ed712b
--- /dev/null
+++ b/drivers/misc/carma/Kconfig
@@ -0,0 +1,17 @@
+config CARMA_FPGA
+	tristate "CARMA DATA-FPGA Access Driver"
+	depends on FSL_SOC && PPC_83xx && MEDIA_SUPPORT && HAS_DMA && FSL_DMA
+	select VIDEOBUF_DMA_SG
+	default n
+	help
+	  Say Y here to include support for communicating with the data
+	  processing FPGAs on the OVRO CARMA board.
+
+config CARMA_FPGA_PROGRAM
+	tristate "CARMA DATA-FPGA Programmer"
+	depends on FSL_SOC && PPC_83xx && MEDIA_SUPPORT && HAS_DMA && FSL_DMA
+	select VIDEOBUF_DMA_SG
+	default n
+	help
+	  Say Y here to include support for programming the data processing
+	  FPGAs on the OVRO CARMA board.
diff --git a/drivers/misc/carma/Makefile b/drivers/misc/carma/Makefile
new file mode 100644
index 000000000000..ff36ac2ce534
--- /dev/null
+++ b/drivers/misc/carma/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CARMA_FPGA)		+= carma-fpga.o
+obj-$(CONFIG_CARMA_FPGA_PROGRAM)	+= carma-fpga-program.o
diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c
new file mode 100644
index 000000000000..7ce6065dc20e
--- /dev/null
+++ b/drivers/misc/carma/carma-fpga-program.c
@@ -0,0 +1,1141 @@
+/*
+ * CARMA Board DATA-FPGA Programmer
+ *
+ * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/of_platform.h>
+#include <linux/completion.h>
+#include <linux/miscdevice.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/slab.h>
+#include <linux/kref.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+
+#include <media/videobuf-dma-sg.h>
+
+/* MPC8349EMDS specific get_immrbase() */
+#include <sysdev/fsl_soc.h>
+
+static const char drv_name[] = "carma-fpga-program";
+
+/*
+ * Firmware images are always this exact size
+ *
+ * 12849552 bytes for a CARMA Digitizer Board (EP2S90 FPGAs)
+ * 18662880 bytes for a CARMA Correlator Board (EP2S130 FPGAs)
+ */
+#define FW_SIZE_EP2S90		12849552
+#define FW_SIZE_EP2S130		18662880
+
+struct fpga_dev {
+	struct miscdevice miscdev;
+
+	/* Reference count */
+	struct kref ref;
+
+	/* Device Registers */
+	struct device *dev;
+	void __iomem *regs;
+	void __iomem *immr;
+
+	/* Freescale DMA Device */
+	struct dma_chan *chan;
+
+	/* Interrupts */
+	int irq, status;
+	struct completion completion;
+
+	/* FPGA Bitfile */
+	struct mutex lock;
+
+	struct videobuf_dmabuf vb;
+	bool vb_allocated;
+
+	/* max size and written bytes */
+	size_t fw_size;
+	size_t bytes;
+};
+
+/*
+ * FPGA Bitfile Helpers
+ */
+
+/**
+ * fpga_drop_firmware_data() - drop the bitfile image from memory
+ * @priv: the driver's private data structure
+ *
+ * LOCKING: must hold priv->lock
+ */
+static void fpga_drop_firmware_data(struct fpga_dev *priv)
+{
+	videobuf_dma_free(&priv->vb);
+	priv->vb_allocated = false;
+	priv->bytes = 0;
+}
+
+/*
+ * Private Data Reference Count
+ */
+
+static void fpga_dev_remove(struct kref *ref)
+{
+	struct fpga_dev *priv = container_of(ref, struct fpga_dev, ref);
+
+	/* free any firmware image that was not programmed */
+	fpga_drop_firmware_data(priv);
+
+	mutex_destroy(&priv->lock);
+	kfree(priv);
+}
+
+/*
+ * LED Trigger (could be a seperate module)
+ */
+
+/*
+ * NOTE: this whole thing does have the problem that whenever the led's are
+ * NOTE: first set to use the fpga trigger, they could be in the wrong state
+ */
+
+DEFINE_LED_TRIGGER(ledtrig_fpga);
+
+static void ledtrig_fpga_programmed(bool enabled)
+{
+	if (enabled)
+		led_trigger_event(ledtrig_fpga, LED_FULL);
+	else
+		led_trigger_event(ledtrig_fpga, LED_OFF);
+}
+
+/*
+ * FPGA Register Helpers
+ */
+
+/* Register Definitions */
+#define FPGA_CONFIG_CONTROL		0x40
+#define FPGA_CONFIG_STATUS		0x44
+#define FPGA_CONFIG_FIFO_SIZE		0x48
+#define FPGA_CONFIG_FIFO_USED		0x4C
+#define FPGA_CONFIG_TOTAL_BYTE_COUNT	0x50
+#define FPGA_CONFIG_CUR_BYTE_COUNT	0x54
+
+#define FPGA_FIFO_ADDRESS		0x3000
+
+static int fpga_fifo_size(void __iomem *regs)
+{
+	return ioread32be(regs + FPGA_CONFIG_FIFO_SIZE);
+}
+
+#define CFG_STATUS_ERR_MASK	0xfffe
+
+static int fpga_config_error(void __iomem *regs)
+{
+	return ioread32be(regs + FPGA_CONFIG_STATUS) & CFG_STATUS_ERR_MASK;
+}
+
+static int fpga_fifo_empty(void __iomem *regs)
+{
+	return ioread32be(regs + FPGA_CONFIG_FIFO_USED) == 0;
+}
+
+static void fpga_fifo_write(void __iomem *regs, u32 val)
+{
+	iowrite32be(val, regs + FPGA_FIFO_ADDRESS);
+}
+
+static void fpga_set_byte_count(void __iomem *regs, u32 count)
+{
+	iowrite32be(count, regs + FPGA_CONFIG_TOTAL_BYTE_COUNT);
+}
+
+#define CFG_CTL_ENABLE	(1 << 0)
+#define CFG_CTL_RESET	(1 << 1)
+#define CFG_CTL_DMA	(1 << 2)
+
+static void fpga_programmer_enable(struct fpga_dev *priv, bool dma)
+{
+	u32 val;
+
+	val = (dma) ? (CFG_CTL_ENABLE | CFG_CTL_DMA) : CFG_CTL_ENABLE;
+	iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL);
+}
+
+static void fpga_programmer_disable(struct fpga_dev *priv)
+{
+	iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL);
+}
+
+static void fpga_dump_registers(struct fpga_dev *priv)
+{
+	u32 control, status, size, used, total, curr;
+
+	/* good status: do nothing */
+	if (priv->status == 0)
+		return;
+
+	/* Dump all status registers */
+	control = ioread32be(priv->regs + FPGA_CONFIG_CONTROL);
+	status = ioread32be(priv->regs + FPGA_CONFIG_STATUS);
+	size = ioread32be(priv->regs + FPGA_CONFIG_FIFO_SIZE);
+	used = ioread32be(priv->regs + FPGA_CONFIG_FIFO_USED);
+	total = ioread32be(priv->regs + FPGA_CONFIG_TOTAL_BYTE_COUNT);
+	curr = ioread32be(priv->regs + FPGA_CONFIG_CUR_BYTE_COUNT);
+
+	dev_err(priv->dev, "Configuration failed, dumping status registers\n");
+	dev_err(priv->dev, "Control:    0x%.8x\n", control);
+	dev_err(priv->dev, "Status:     0x%.8x\n", status);
+	dev_err(priv->dev, "FIFO Size:  0x%.8x\n", size);
+	dev_err(priv->dev, "FIFO Used:  0x%.8x\n", used);
+	dev_err(priv->dev, "FIFO Total: 0x%.8x\n", total);
+	dev_err(priv->dev, "FIFO Curr:  0x%.8x\n", curr);
+}
+
+/*
+ * FPGA Power Supply Code
+ */
+
+#define CTL_PWR_CONTROL		0x2006
+#define CTL_PWR_STATUS		0x200A
+#define CTL_PWR_FAIL		0x200B
+
+#define PWR_CONTROL_ENABLE	0x01
+
+#define PWR_STATUS_ERROR_MASK	0x10
+#define PWR_STATUS_GOOD		0x0f
+
+/*
+ * Determine if the FPGA power is good for all supplies
+ */
+static bool fpga_power_good(struct fpga_dev *priv)
+{
+	u8 val;
+
+	val = ioread8(priv->regs + CTL_PWR_STATUS);
+	if (val & PWR_STATUS_ERROR_MASK)
+		return false;
+
+	return val == PWR_STATUS_GOOD;
+}
+
+/*
+ * Disable the FPGA power supplies
+ */
+static void fpga_disable_power_supplies(struct fpga_dev *priv)
+{
+	unsigned long start;
+	u8 val;
+
+	iowrite8(0x0, priv->regs + CTL_PWR_CONTROL);
+
+	/*
+	 * Wait 500ms for the power rails to discharge
+	 *
+	 * Without this delay, the CTL-CPLD state machine can get into a
+	 * state where it is waiting for the power-goods to assert, but they
+	 * never do. This only happens when enabling and disabling the
+	 * power sequencer very rapidly.
+	 *
+	 * The loop below will also wait for the power goods to de-assert,
+	 * but testing has shown that they are always disabled by the time
+	 * the sleep completes. However, omitting the sleep and only waiting
+	 * for the power-goods to de-assert was not sufficient to ensure
+	 * that the power sequencer would not wedge itself.
+	 */
+	msleep(500);
+
+	start = jiffies;
+	while (time_before(jiffies, start + HZ)) {
+		val = ioread8(priv->regs + CTL_PWR_STATUS);
+		if (!(val & PWR_STATUS_GOOD))
+			break;
+
+		usleep_range(5000, 10000);
+	}
+
+	val = ioread8(priv->regs + CTL_PWR_STATUS);
+	if (val & PWR_STATUS_GOOD) {
+		dev_err(priv->dev, "power disable failed: "
+				   "power goods: status 0x%.2x\n", val);
+	}
+
+	if (val & PWR_STATUS_ERROR_MASK) {
+		dev_err(priv->dev, "power disable failed: "
+				   "alarm bit set: status 0x%.2x\n", val);
+	}
+}
+
+/**
+ * fpga_enable_power_supplies() - enable the DATA-FPGA power supplies
+ * @priv: the driver's private data structure
+ *
+ * Enable the DATA-FPGA power supplies, waiting up to 1 second for
+ * them to enable successfully.
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int fpga_enable_power_supplies(struct fpga_dev *priv)
+{
+	unsigned long start = jiffies;
+
+	if (fpga_power_good(priv)) {
+		dev_dbg(priv->dev, "power was already good\n");
+		return 0;
+	}
+
+	iowrite8(PWR_CONTROL_ENABLE, priv->regs + CTL_PWR_CONTROL);
+	while (time_before(jiffies, start + HZ)) {
+		if (fpga_power_good(priv))
+			return 0;
+
+		usleep_range(5000, 10000);
+	}
+
+	return fpga_power_good(priv) ? 0 : -ETIMEDOUT;
+}
+
+/*
+ * Determine if the FPGA power supplies are all enabled
+ */
+static bool fpga_power_enabled(struct fpga_dev *priv)
+{
+	u8 val;
+
+	val = ioread8(priv->regs + CTL_PWR_CONTROL);
+	if (val & PWR_CONTROL_ENABLE)
+		return true;
+
+	return false;
+}
+
+/*
+ * Determine if the FPGA's are programmed and running correctly
+ */
+static bool fpga_running(struct fpga_dev *priv)
+{
+	if (!fpga_power_good(priv))
+		return false;
+
+	/* Check the config done bit */
+	return ioread32be(priv->regs + FPGA_CONFIG_STATUS) & (1 << 18);
+}
+
+/*
+ * FPGA Programming Code
+ */
+
+/**
+ * fpga_program_block() - put a block of data into the programmer's FIFO
+ * @priv: the driver's private data structure
+ * @buf: the data to program
+ * @count: the length of data to program (must be a multiple of 4 bytes)
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int fpga_program_block(struct fpga_dev *priv, void *buf, size_t count)
+{
+	u32 *data = buf;
+	int size = fpga_fifo_size(priv->regs);
+	int i, len;
+	unsigned long timeout;
+
+	/* enforce correct data length for the FIFO */
+	BUG_ON(count % 4 != 0);
+
+	while (count > 0) {
+
+		/* Get the size of the block to write (maximum is FIFO_SIZE) */
+		len = min_t(size_t, count, size);
+		timeout = jiffies + HZ / 4;
+
+		/* Write the block */
+		for (i = 0; i < len / 4; i++)
+			fpga_fifo_write(priv->regs, data[i]);
+
+		/* Update the amounts left */
+		count -= len;
+		data += len / 4;
+
+		/* Wait for the fifo to empty */
+		while (true) {
+
+			if (fpga_fifo_empty(priv->regs)) {
+				break;
+			} else {
+				dev_dbg(priv->dev, "Fifo not empty\n");
+				cpu_relax();
+			}
+
+			if (fpga_config_error(priv->regs)) {
+				dev_err(priv->dev, "Error detected\n");
+				return -EIO;
+			}
+
+			if (time_after(jiffies, timeout)) {
+				dev_err(priv->dev, "Fifo drain timeout\n");
+				return -ETIMEDOUT;
+			}
+
+			usleep_range(5000, 10000);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * fpga_program_cpu() - program the DATA-FPGA's using the CPU
+ * @priv: the driver's private data structure
+ *
+ * This is useful when the DMA programming method fails. It is possible to
+ * wedge the Freescale DMA controller such that the DMA programming method
+ * always fails. This method has always succeeded.
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static noinline int fpga_program_cpu(struct fpga_dev *priv)
+{
+	int ret;
+
+	/* Disable the programmer */
+	fpga_programmer_disable(priv);
+
+	/* Set the total byte count */
+	fpga_set_byte_count(priv->regs, priv->bytes);
+	dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes);
+
+	/* Enable the controller for programming */
+	fpga_programmer_enable(priv, false);
+	dev_dbg(priv->dev, "enabled the controller\n");
+
+	/* Write each chunk of the FPGA bitfile to FPGA programmer */
+	ret = fpga_program_block(priv, priv->vb.vaddr, priv->bytes);
+	if (ret)
+		goto out_disable_controller;
+
+	/* Wait for the interrupt handler to signal that programming finished */
+	ret = wait_for_completion_timeout(&priv->completion, 2 * HZ);
+	if (!ret) {
+		dev_err(priv->dev, "Timed out waiting for completion\n");
+		ret = -ETIMEDOUT;
+		goto out_disable_controller;
+	}
+
+	/* Retrieve the status from the interrupt handler */
+	ret = priv->status;
+
+out_disable_controller:
+	fpga_programmer_disable(priv);
+	return ret;
+}
+
+#define FIFO_DMA_ADDRESS	0xf0003000
+#define FIFO_MAX_LEN		4096
+
+/**
+ * fpga_program_dma() - program the DATA-FPGA's using the DMA engine
+ * @priv: the driver's private data structure
+ *
+ * Program the DATA-FPGA's using the Freescale DMA engine. This requires that
+ * the engine is programmed such that the hardware DMA request lines can
+ * control the entire DMA transaction. The system controller FPGA then
+ * completely offloads the programming from the CPU.
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static noinline int fpga_program_dma(struct fpga_dev *priv)
+{
+	struct videobuf_dmabuf *vb = &priv->vb;
+	struct dma_chan *chan = priv->chan;
+	struct dma_async_tx_descriptor *tx;
+	size_t num_pages, len, avail = 0;
+	struct dma_slave_config config;
+	struct scatterlist *sg;
+	struct sg_table table;
+	dma_cookie_t cookie;
+	int ret, i;
+
+	/* Disable the programmer */
+	fpga_programmer_disable(priv);
+
+	/* Allocate a scatterlist for the DMA destination */
+	num_pages = DIV_ROUND_UP(priv->bytes, FIFO_MAX_LEN);
+	ret = sg_alloc_table(&table, num_pages, GFP_KERNEL);
+	if (ret) {
+		dev_err(priv->dev, "Unable to allocate dst scatterlist\n");
+		ret = -ENOMEM;
+		goto out_return;
+	}
+
+	/*
+	 * This is an ugly hack
+	 *
+	 * We fill in a scatterlist as if it were mapped for DMA. This is
+	 * necessary because there exists no better structure for this
+	 * inside the kernel code.
+	 *
+	 * As an added bonus, we can use the DMAEngine API for all of this,
+	 * rather than inventing another extremely similar API.
+	 */
+	avail = priv->bytes;
+	for_each_sg(table.sgl, sg, num_pages, i) {
+		len = min_t(size_t, avail, FIFO_MAX_LEN);
+		sg_dma_address(sg) = FIFO_DMA_ADDRESS;
+		sg_dma_len(sg) = len;
+
+		avail -= len;
+	}
+
+	/* Map the buffer for DMA */
+	ret = videobuf_dma_map(priv->dev, &priv->vb);
+	if (ret) {
+		dev_err(priv->dev, "Unable to map buffer for DMA\n");
+		goto out_free_table;
+	}
+
+	/*
+	 * Configure the DMA channel to transfer FIFO_SIZE / 2 bytes per
+	 * transaction, and then put it under external control
+	 */
+	memset(&config, 0, sizeof(config));
+	config.direction = DMA_TO_DEVICE;
+	config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	config.dst_maxburst = fpga_fifo_size(priv->regs) / 2 / 4;
+	ret = chan->device->device_control(chan, DMA_SLAVE_CONFIG,
+					   (unsigned long)&config);
+	if (ret) {
+		dev_err(priv->dev, "DMA slave configuration failed\n");
+		goto out_dma_unmap;
+	}
+
+	ret = chan->device->device_control(chan, FSLDMA_EXTERNAL_START, 1);
+	if (ret) {
+		dev_err(priv->dev, "DMA external control setup failed\n");
+		goto out_dma_unmap;
+	}
+
+	/* setup and submit the DMA transaction */
+	tx = chan->device->device_prep_dma_sg(chan,
+					      table.sgl, num_pages,
+					      vb->sglist, vb->sglen, 0);
+	if (!tx) {
+		dev_err(priv->dev, "Unable to prep DMA transaction\n");
+		ret = -ENOMEM;
+		goto out_dma_unmap;
+	}
+
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(priv->dev, "Unable to submit DMA transaction\n");
+		ret = -ENOMEM;
+		goto out_dma_unmap;
+	}
+
+	dma_async_memcpy_issue_pending(chan);
+
+	/* Set the total byte count */
+	fpga_set_byte_count(priv->regs, priv->bytes);
+	dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes);
+
+	/* Enable the controller for DMA programming */
+	fpga_programmer_enable(priv, true);
+	dev_dbg(priv->dev, "enabled the controller\n");
+
+	/* Wait for the interrupt handler to signal that programming finished */
+	ret = wait_for_completion_timeout(&priv->completion, 2 * HZ);
+	if (!ret) {
+		dev_err(priv->dev, "Timed out waiting for completion\n");
+		ret = -ETIMEDOUT;
+		goto out_disable_controller;
+	}
+
+	/* Retrieve the status from the interrupt handler */
+	ret = priv->status;
+
+out_disable_controller:
+	fpga_programmer_disable(priv);
+out_dma_unmap:
+	videobuf_dma_unmap(priv->dev, vb);
+out_free_table:
+	sg_free_table(&table);
+out_return:
+	return ret;
+}
+
+/*
+ * Interrupt Handling
+ */
+
+static irqreturn_t fpga_irq(int irq, void *dev_id)
+{
+	struct fpga_dev *priv = dev_id;
+
+	/* Save the status */
+	priv->status = fpga_config_error(priv->regs) ? -EIO : 0;
+	dev_dbg(priv->dev, "INTERRUPT status %d\n", priv->status);
+	fpga_dump_registers(priv);
+
+	/* Disabling the programmer clears the interrupt */
+	fpga_programmer_disable(priv);
+
+	/* Notify any waiters */
+	complete(&priv->completion);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * SYSFS Helpers
+ */
+
+/**
+ * fpga_do_stop() - deconfigure (reset) the DATA-FPGA's
+ * @priv: the driver's private data structure
+ *
+ * LOCKING: must hold priv->lock
+ */
+static int fpga_do_stop(struct fpga_dev *priv)
+{
+	u32 val;
+
+	/* Set the led to unprogrammed */
+	ledtrig_fpga_programmed(false);
+
+	/* Pulse the config line to reset the FPGA's */
+	val = CFG_CTL_ENABLE | CFG_CTL_RESET;
+	iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL);
+	iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL);
+
+	return 0;
+}
+
+static noinline int fpga_do_program(struct fpga_dev *priv)
+{
+	int ret;
+
+	if (priv->bytes != priv->fw_size) {
+		dev_err(priv->dev, "Incorrect bitfile size: got %zu bytes, "
+				   "should be %zu bytes\n",
+				   priv->bytes, priv->fw_size);
+		return -EINVAL;
+	}
+
+	if (!fpga_power_enabled(priv)) {
+		dev_err(priv->dev, "Power not enabled\n");
+		return -EINVAL;
+	}
+
+	if (!fpga_power_good(priv)) {
+		dev_err(priv->dev, "Power not good\n");
+		return -EINVAL;
+	}
+
+	/* Set the LED to unprogrammed */
+	ledtrig_fpga_programmed(false);
+
+	/* Try to program the FPGA's using DMA */
+	ret = fpga_program_dma(priv);
+
+	/* If DMA failed or doesn't exist, try with CPU */
+	if (ret) {
+		dev_warn(priv->dev, "Falling back to CPU programming\n");
+		ret = fpga_program_cpu(priv);
+	}
+
+	if (ret) {
+		dev_err(priv->dev, "Unable to program FPGA's\n");
+		return ret;
+	}
+
+	/* Drop the firmware bitfile from memory */
+	fpga_drop_firmware_data(priv);
+
+	dev_dbg(priv->dev, "FPGA programming successful\n");
+	ledtrig_fpga_programmed(true);
+
+	return 0;
+}
+
+/*
+ * File Operations
+ */
+
+static int fpga_open(struct inode *inode, struct file *filp)
+{
+	/*
+	 * The miscdevice layer puts our struct miscdevice into the
+	 * filp->private_data field. We use this to find our private
+	 * data and then overwrite it with our own private structure.
+	 */
+	struct fpga_dev *priv = container_of(filp->private_data,
+					     struct fpga_dev, miscdev);
+	unsigned int nr_pages;
+	int ret;
+
+	/* We only allow one process at a time */
+	ret = mutex_lock_interruptible(&priv->lock);
+	if (ret)
+		return ret;
+
+	filp->private_data = priv;
+	kref_get(&priv->ref);
+
+	/* Truncation: drop any existing data */
+	if (filp->f_flags & O_TRUNC)
+		priv->bytes = 0;
+
+	/* Check if we have already allocated a buffer */
+	if (priv->vb_allocated)
+		return 0;
+
+	/* Allocate a buffer to hold enough data for the bitfile */
+	nr_pages = DIV_ROUND_UP(priv->fw_size, PAGE_SIZE);
+	ret = videobuf_dma_init_kernel(&priv->vb, DMA_TO_DEVICE, nr_pages);
+	if (ret) {
+		dev_err(priv->dev, "unable to allocate data buffer\n");
+		mutex_unlock(&priv->lock);
+		kref_put(&priv->ref, fpga_dev_remove);
+		return ret;
+	}
+
+	priv->vb_allocated = true;
+	return 0;
+}
+
+static int fpga_release(struct inode *inode, struct file *filp)
+{
+	struct fpga_dev *priv = filp->private_data;
+
+	mutex_unlock(&priv->lock);
+	kref_put(&priv->ref, fpga_dev_remove);
+	return 0;
+}
+
+static ssize_t fpga_write(struct file *filp, const char __user *buf,
+			  size_t count, loff_t *f_pos)
+{
+	struct fpga_dev *priv = filp->private_data;
+
+	/* FPGA bitfiles have an exact size: disallow anything else */
+	if (priv->bytes >= priv->fw_size)
+		return -ENOSPC;
+
+	count = min_t(size_t, priv->fw_size - priv->bytes, count);
+	if (copy_from_user(priv->vb.vaddr + priv->bytes, buf, count))
+		return -EFAULT;
+
+	priv->bytes += count;
+	return count;
+}
+
+static ssize_t fpga_read(struct file *filp, char __user *buf, size_t count,
+			 loff_t *f_pos)
+{
+	struct fpga_dev *priv = filp->private_data;
+
+	count = min_t(size_t, priv->bytes - *f_pos, count);
+	if (copy_to_user(buf, priv->vb.vaddr + *f_pos, count))
+		return -EFAULT;
+
+	*f_pos += count;
+	return count;
+}
+
+static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin)
+{
+	struct fpga_dev *priv = filp->private_data;
+	loff_t newpos;
+
+	/* only read-only opens are allowed to seek */
+	if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+		return -EINVAL;
+
+	switch (origin) {
+	case SEEK_SET: /* seek relative to the beginning of the file */
+		newpos = offset;
+		break;
+	case SEEK_CUR: /* seek relative to current position in the file */
+		newpos = filp->f_pos + offset;
+		break;
+	case SEEK_END: /* seek relative to the end of the file */
+		newpos = priv->fw_size - offset;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* check for sanity */
+	if (newpos > priv->fw_size)
+		return -EINVAL;
+
+	filp->f_pos = newpos;
+	return newpos;
+}
+
+static const struct file_operations fpga_fops = {
+	.open		= fpga_open,
+	.release	= fpga_release,
+	.write		= fpga_write,
+	.read		= fpga_read,
+	.llseek		= fpga_llseek,
+};
+
+/*
+ * Device Attributes
+ */
+
+static ssize_t pfail_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct fpga_dev *priv = dev_get_drvdata(dev);
+	u8 val;
+
+	val = ioread8(priv->regs + CTL_PWR_FAIL);
+	return snprintf(buf, PAGE_SIZE, "0x%.2x\n", val);
+}
+
+static ssize_t pgood_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct fpga_dev *priv = dev_get_drvdata(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_good(priv));
+}
+
+static ssize_t penable_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct fpga_dev *priv = dev_get_drvdata(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_enabled(priv));
+}
+
+static ssize_t penable_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct fpga_dev *priv = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	if (strict_strtoul(buf, 0, &val))
+		return -EINVAL;
+
+	if (val) {
+		ret = fpga_enable_power_supplies(priv);
+		if (ret)
+			return ret;
+	} else {
+		fpga_do_stop(priv);
+		fpga_disable_power_supplies(priv);
+	}
+
+	return count;
+}
+
+static ssize_t program_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct fpga_dev *priv = dev_get_drvdata(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", fpga_running(priv));
+}
+
+static ssize_t program_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct fpga_dev *priv = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	if (strict_strtoul(buf, 0, &val))
+		return -EINVAL;
+
+	/* We can't have an image writer and be programming simultaneously */
+	if (mutex_lock_interruptible(&priv->lock))
+		return -ERESTARTSYS;
+
+	/* Program or Reset the FPGA's */
+	ret = val ? fpga_do_program(priv) : fpga_do_stop(priv);
+	if (ret)
+		goto out_unlock;
+
+	/* Success */
+	ret = count;
+
+out_unlock:
+	mutex_unlock(&priv->lock);
+	return ret;
+}
+
+static DEVICE_ATTR(power_fail, S_IRUGO, pfail_show, NULL);
+static DEVICE_ATTR(power_good, S_IRUGO, pgood_show, NULL);
+static DEVICE_ATTR(power_enable, S_IRUGO | S_IWUSR,
+		   penable_show, penable_store);
+
+static DEVICE_ATTR(program, S_IRUGO | S_IWUSR,
+		   program_show, program_store);
+
+static struct attribute *fpga_attributes[] = {
+	&dev_attr_power_fail.attr,
+	&dev_attr_power_good.attr,
+	&dev_attr_power_enable.attr,
+	&dev_attr_program.attr,
+	NULL,
+};
+
+static const struct attribute_group fpga_attr_group = {
+	.attrs = fpga_attributes,
+};
+
+/*
+ * OpenFirmware Device Subsystem
+ */
+
+#define SYS_REG_VERSION		0x00
+#define SYS_REG_GEOGRAPHIC	0x10
+
+static bool dma_filter(struct dma_chan *chan, void *data)
+{
+	/*
+	 * DMA Channel #0 is the only acceptable device
+	 *
+	 * This probably won't survive an unload/load cycle of the Freescale
+	 * DMAEngine driver, but that won't be a problem
+	 */
+	return chan->chan_id == 0 && chan->device->dev_id == 0;
+}
+
+static int fpga_of_remove(struct platform_device *op)
+{
+	struct fpga_dev *priv = dev_get_drvdata(&op->dev);
+	struct device *this_device = priv->miscdev.this_device;
+
+	sysfs_remove_group(&this_device->kobj, &fpga_attr_group);
+	misc_deregister(&priv->miscdev);
+
+	free_irq(priv->irq, priv);
+	irq_dispose_mapping(priv->irq);
+
+	/* make sure the power supplies are off */
+	fpga_disable_power_supplies(priv);
+
+	/* unmap registers */
+	iounmap(priv->immr);
+	iounmap(priv->regs);
+
+	dma_release_channel(priv->chan);
+
+	/* drop our reference to the private data structure */
+	kref_put(&priv->ref, fpga_dev_remove);
+	return 0;
+}
+
+/* CTL-CPLD Version Register */
+#define CTL_CPLD_VERSION	0x2000
+
+static int fpga_of_probe(struct platform_device *op,
+			 const struct of_device_id *match)
+{
+	struct device_node *of_node = op->dev.of_node;
+	struct device *this_device;
+	struct fpga_dev *priv;
+	dma_cap_mask_t mask;
+	u32 ver;
+	int ret;
+
+	/* Allocate private data */
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&op->dev, "Unable to allocate private data\n");
+		ret = -ENOMEM;
+		goto out_return;
+	}
+
+	/* Setup the miscdevice */
+	priv->miscdev.minor = MISC_DYNAMIC_MINOR;
+	priv->miscdev.name = drv_name;
+	priv->miscdev.fops = &fpga_fops;
+
+	kref_init(&priv->ref);
+
+	dev_set_drvdata(&op->dev, priv);
+	priv->dev = &op->dev;
+	mutex_init(&priv->lock);
+	init_completion(&priv->completion);
+	videobuf_dma_init(&priv->vb);
+
+	dev_set_drvdata(priv->dev, priv);
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+	dma_cap_set(DMA_INTERRUPT, mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_cap_set(DMA_SG, mask);
+
+	/* Get control of DMA channel #0 */
+	priv->chan = dma_request_channel(mask, dma_filter, NULL);
+	if (!priv->chan) {
+		dev_err(&op->dev, "Unable to acquire DMA channel #0\n");
+		ret = -ENODEV;
+		goto out_free_priv;
+	}
+
+	/* Remap the registers for use */
+	priv->regs = of_iomap(of_node, 0);
+	if (!priv->regs) {
+		dev_err(&op->dev, "Unable to ioremap registers\n");
+		ret = -ENOMEM;
+		goto out_dma_release_channel;
+	}
+
+	/* Remap the IMMR for use */
+	priv->immr = ioremap(get_immrbase(), 0x100000);
+	if (!priv->immr) {
+		dev_err(&op->dev, "Unable to ioremap IMMR\n");
+		ret = -ENOMEM;
+		goto out_unmap_regs;
+	}
+
+	/*
+	 * Check that external DMA is configured
+	 *
+	 * U-Boot does this for us, but we should check it and bail out if
+	 * there is a problem. Failing to have this register setup correctly
+	 * will cause the DMA controller to transfer a single cacheline
+	 * worth of data, then wedge itself.
+	 */
+	if ((ioread32be(priv->immr + 0x114) & 0xE00) != 0xE00) {
+		dev_err(&op->dev, "External DMA control not configured\n");
+		ret = -ENODEV;
+		goto out_unmap_immr;
+	}
+
+	/*
+	 * Check the CTL-CPLD version
+	 *
+	 * This driver uses the CTL-CPLD DATA-FPGA power sequencer, and we
+	 * don't want to run on any version of the CTL-CPLD that does not use
+	 * a compatible register layout.
+	 *
+	 * v2: changed register layout, added power sequencer
+	 * v3: added glitch filter on the i2c overcurrent/overtemp outputs
+	 */
+	ver = ioread8(priv->regs + CTL_CPLD_VERSION);
+	if (ver != 0x02 && ver != 0x03) {
+		dev_err(&op->dev, "CTL-CPLD is not version 0x02 or 0x03!\n");
+		ret = -ENODEV;
+		goto out_unmap_immr;
+	}
+
+	/* Set the exact size that the firmware image should be */
+	ver = ioread32be(priv->regs + SYS_REG_VERSION);
+	priv->fw_size = (ver & (1 << 18)) ? FW_SIZE_EP2S130 : FW_SIZE_EP2S90;
+
+	/* Find the correct IRQ number */
+	priv->irq = irq_of_parse_and_map(of_node, 0);
+	if (priv->irq == NO_IRQ) {
+		dev_err(&op->dev, "Unable to find IRQ line\n");
+		ret = -ENODEV;
+		goto out_unmap_immr;
+	}
+
+	/* Request the IRQ */
+	ret = request_irq(priv->irq, fpga_irq, IRQF_SHARED, drv_name, priv);
+	if (ret) {
+		dev_err(&op->dev, "Unable to request IRQ %d\n", priv->irq);
+		ret = -ENODEV;
+		goto out_irq_dispose_mapping;
+	}
+
+	/* Reset and stop the FPGA's, just in case */
+	fpga_do_stop(priv);
+
+	/* Register the miscdevice */
+	ret = misc_register(&priv->miscdev);
+	if (ret) {
+		dev_err(&op->dev, "Unable to register miscdevice\n");
+		goto out_free_irq;
+	}
+
+	/* Create the sysfs files */
+	this_device = priv->miscdev.this_device;
+	dev_set_drvdata(this_device, priv);
+	ret = sysfs_create_group(&this_device->kobj, &fpga_attr_group);
+	if (ret) {
+		dev_err(&op->dev, "Unable to create sysfs files\n");
+		goto out_misc_deregister;
+	}
+
+	dev_info(priv->dev, "CARMA FPGA Programmer: %s rev%s with %s FPGAs\n",
+			(ver & (1 << 17)) ? "Correlator" : "Digitizer",
+			(ver & (1 << 16)) ? "B" : "A",
+			(ver & (1 << 18)) ? "EP2S130" : "EP2S90");
+
+	return 0;
+
+out_misc_deregister:
+	misc_deregister(&priv->miscdev);
+out_free_irq:
+	free_irq(priv->irq, priv);
+out_irq_dispose_mapping:
+	irq_dispose_mapping(priv->irq);
+out_unmap_immr:
+	iounmap(priv->immr);
+out_unmap_regs:
+	iounmap(priv->regs);
+out_dma_release_channel:
+	dma_release_channel(priv->chan);
+out_free_priv:
+	kref_put(&priv->ref, fpga_dev_remove);
+out_return:
+	return ret;
+}
+
+static struct of_device_id fpga_of_match[] = {
+	{ .compatible = "carma,fpga-programmer", },
+	{},
+};
+
+static struct of_platform_driver fpga_of_driver = {
+	.probe		= fpga_of_probe,
+	.remove		= fpga_of_remove,
+	.driver		= {
+		.name		= drv_name,
+		.of_match_table	= fpga_of_match,
+		.owner		= THIS_MODULE,
+	},
+};
+
+/*
+ * Module Init / Exit
+ */
+
+static int __init fpga_init(void)
+{
+	led_trigger_register_simple("fpga", &ledtrig_fpga);
+	return of_register_platform_driver(&fpga_of_driver);
+}
+
+static void __exit fpga_exit(void)
+{
+	of_unregister_platform_driver(&fpga_of_driver);
+	led_trigger_unregister_simple(ledtrig_fpga);
+}
+
+MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>");
+MODULE_DESCRIPTION("CARMA Board DATA-FPGA Programmer");
+MODULE_LICENSE("GPL");
+
+module_init(fpga_init);
+module_exit(fpga_exit);
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
new file mode 100644
index 000000000000..3965821fef17
--- /dev/null
+++ b/drivers/misc/carma/carma-fpga.c
@@ -0,0 +1,1433 @@
+/*
+ * CARMA DATA-FPGA Access Driver
+ *
+ * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+/*
+ * FPGA Memory Dump Format
+ *
+ * FPGA #0 control registers (32 x 32-bit words)
+ * FPGA #1 control registers (32 x 32-bit words)
+ * FPGA #2 control registers (32 x 32-bit words)
+ * FPGA #3 control registers (32 x 32-bit words)
+ * SYSFPGA control registers (32 x 32-bit words)
+ * FPGA #0 correlation array (NUM_CORL0 correlation blocks)
+ * FPGA #1 correlation array (NUM_CORL1 correlation blocks)
+ * FPGA #2 correlation array (NUM_CORL2 correlation blocks)
+ * FPGA #3 correlation array (NUM_CORL3 correlation blocks)
+ *
+ * Each correlation array consists of:
+ *
+ * Correlation Data      (2 x NUM_LAGSn x 32-bit words)
+ * Pipeline Metadata     (2 x NUM_METAn x 32-bit words)
+ * Quantization Counters (2 x NUM_QCNTn x 32-bit words)
+ *
+ * The NUM_CORLn, NUM_LAGSn, NUM_METAn, and NUM_QCNTn values come from
+ * the FPGA configuration registers. They do not change once the FPGA's
+ * have been programmed, they only change on re-programming.
+ */
+
+/*
+ * Basic Description:
+ *
+ * This driver is used to capture correlation spectra off of the four data
+ * processing FPGAs. The FPGAs are often reprogrammed at runtime, therefore
+ * this driver supports dynamic enable/disable of capture while the device
+ * remains open.
+ *
+ * The nominal capture rate is 64Hz (every 15.625ms). To facilitate this fast
+ * capture rate, all buffers are pre-allocated to avoid any potentially long
+ * running memory allocations while capturing.
+ *
+ * There are two lists and one pointer which are used to keep track of the
+ * different states of data buffers.
+ *
+ * 1) free list
+ * This list holds all empty data buffers which are ready to receive data.
+ *
+ * 2) inflight pointer
+ * This pointer holds the currently inflight data buffer. This buffer is having
+ * data copied into it by the DMA engine.
+ *
+ * 3) used list
+ * This list holds data buffers which have been filled, and are waiting to be
+ * read by userspace.
+ *
+ * All buffers start life on the free list, then move successively to the
+ * inflight pointer, and then to the used list. After they have been read by
+ * userspace, they are moved back to the free list. The cycle repeats as long
+ * as necessary.
+ *
+ * It should be noted that all buffers are mapped and ready for DMA when they
+ * are on any of the three lists. They are only unmapped when they are in the
+ * process of being read by userspace.
+ */
+
+/*
+ * Notes on the IRQ masking scheme:
+ *
+ * The IRQ masking scheme here is different than most other hardware. The only
+ * way for the DATA-FPGAs to detect if the kernel has taken too long to copy
+ * the data is if the status registers are not cleared before the next
+ * correlation data dump is ready.
+ *
+ * The interrupt line is connected to the status registers, such that when they
+ * are cleared, the interrupt is de-asserted. Therein lies our problem. We need
+ * to schedule a long-running DMA operation and return from the interrupt
+ * handler quickly, but we cannot clear the status registers.
+ *
+ * To handle this, the system controller FPGA has the capability to connect the
+ * interrupt line to a user-controlled GPIO pin. This pin is driven high
+ * (unasserted) and left that way. To mask the interrupt, we change the
+ * interrupt source to the GPIO pin. Tada, we hid the interrupt. :)
+ */
+
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/seq_file.h>
+#include <linux/highmem.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kref.h>
+#include <linux/io.h>
+
+#include <media/videobuf-dma-sg.h>
+
+/* system controller registers */
+#define SYS_IRQ_SOURCE_CTL	0x24
+#define SYS_IRQ_OUTPUT_EN	0x28
+#define SYS_IRQ_OUTPUT_DATA	0x2C
+#define SYS_IRQ_INPUT_DATA	0x30
+#define SYS_FPGA_CONFIG_STATUS	0x44
+
+/* GPIO IRQ line assignment */
+#define IRQ_CORL_DONE		0x10
+
+/* FPGA registers */
+#define MMAP_REG_VERSION	0x00
+#define MMAP_REG_CORL_CONF1	0x08
+#define MMAP_REG_CORL_CONF2	0x0C
+#define MMAP_REG_STATUS		0x48
+
+#define SYS_FPGA_BLOCK		0xF0000000
+
+#define DATA_FPGA_START		0x400000
+#define DATA_FPGA_SIZE		0x80000
+
+static const char drv_name[] = "carma-fpga";
+
+#define NUM_FPGA	4
+
+#define MIN_DATA_BUFS	8
+#define MAX_DATA_BUFS	64
+
+struct fpga_info {
+	unsigned int num_lag_ram;
+	unsigned int blk_size;
+};
+
+struct data_buf {
+	struct list_head entry;
+	struct videobuf_dmabuf vb;
+	size_t size;
+};
+
+struct fpga_device {
+	/* character device */
+	struct miscdevice miscdev;
+	struct device *dev;
+	struct mutex mutex;
+
+	/* reference count */
+	struct kref ref;
+
+	/* FPGA registers and information */
+	struct fpga_info info[NUM_FPGA];
+	void __iomem *regs;
+	int irq;
+
+	/* FPGA Physical Address/Size Information */
+	resource_size_t phys_addr;
+	size_t phys_size;
+
+	/* DMA structures */
+	struct sg_table corl_table;
+	unsigned int corl_nents;
+	struct dma_chan *chan;
+
+	/* Protection for all members below */
+	spinlock_t lock;
+
+	/* Device enable/disable flag */
+	bool enabled;
+
+	/* Correlation data buffers */
+	wait_queue_head_t wait;
+	struct list_head free;
+	struct list_head used;
+	struct data_buf *inflight;
+
+	/* Information about data buffers */
+	unsigned int num_dropped;
+	unsigned int num_buffers;
+	size_t bufsize;
+	struct dentry *dbg_entry;
+};
+
+struct fpga_reader {
+	struct fpga_device *priv;
+	struct data_buf *buf;
+	off_t buf_start;
+};
+
+static void fpga_device_release(struct kref *ref)
+{
+	struct fpga_device *priv = container_of(ref, struct fpga_device, ref);
+
+	/* the last reader has exited, cleanup the last bits */
+	mutex_destroy(&priv->mutex);
+	kfree(priv);
+}
+
+/*
+ * Data Buffer Allocation Helpers
+ */
+
+/**
+ * data_free_buffer() - free a single data buffer and all allocated memory
+ * @buf: the buffer to free
+ *
+ * This will free all of the pages allocated to the given data buffer, and
+ * then free the structure itself
+ */
+static void data_free_buffer(struct data_buf *buf)
+{
+	/* It is ok to free a NULL buffer */
+	if (!buf)
+		return;
+
+	/* free all memory */
+	videobuf_dma_free(&buf->vb);
+	kfree(buf);
+}
+
+/**
+ * data_alloc_buffer() - allocate and fill a data buffer with pages
+ * @bytes: the number of bytes required
+ *
+ * This allocates all space needed for a data buffer. It must be mapped before
+ * use in a DMA transaction using videobuf_dma_map().
+ *
+ * Returns NULL on failure
+ */
+static struct data_buf *data_alloc_buffer(const size_t bytes)
+{
+	unsigned int nr_pages;
+	struct data_buf *buf;
+	int ret;
+
+	/* calculate the number of pages necessary */
+	nr_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
+
+	/* allocate the buffer structure */
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		goto out_return;
+
+	/* initialize internal fields */
+	INIT_LIST_HEAD(&buf->entry);
+	buf->size = bytes;
+
+	/* allocate the videobuf */
+	videobuf_dma_init(&buf->vb);
+	ret = videobuf_dma_init_kernel(&buf->vb, DMA_FROM_DEVICE, nr_pages);
+	if (ret)
+		goto out_free_buf;
+
+	return buf;
+
+out_free_buf:
+	kfree(buf);
+out_return:
+	return NULL;
+}
+
+/**
+ * data_free_buffers() - free all allocated buffers
+ * @priv: the driver's private data structure
+ *
+ * Free all buffers allocated by the driver (except those currently in the
+ * process of being read by userspace).
+ *
+ * LOCKING: must hold dev->mutex
+ * CONTEXT: user
+ */
+static void data_free_buffers(struct fpga_device *priv)
+{
+	struct data_buf *buf, *tmp;
+
+	/* the device should be stopped, no DMA in progress */
+	BUG_ON(priv->inflight != NULL);
+
+	list_for_each_entry_safe(buf, tmp, &priv->free, entry) {
+		list_del_init(&buf->entry);
+		videobuf_dma_unmap(priv->dev, &buf->vb);
+		data_free_buffer(buf);
+	}
+
+	list_for_each_entry_safe(buf, tmp, &priv->used, entry) {
+		list_del_init(&buf->entry);
+		videobuf_dma_unmap(priv->dev, &buf->vb);
+		data_free_buffer(buf);
+	}
+
+	priv->num_buffers = 0;
+	priv->bufsize = 0;
+}
+
+/**
+ * data_alloc_buffers() - allocate 1 seconds worth of data buffers
+ * @priv: the driver's private data structure
+ *
+ * Allocate enough buffers for a whole second worth of data
+ *
+ * This routine will attempt to degrade nicely by succeeding even if a full
+ * second worth of data buffers could not be allocated, as long as a minimum
+ * number were allocated. In this case, it will print a message to the kernel
+ * log.
+ *
+ * The device must not be modifying any lists when this is called.
+ *
+ * CONTEXT: user
+ * LOCKING: must hold dev->mutex
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int data_alloc_buffers(struct fpga_device *priv)
+{
+	struct data_buf *buf;
+	int i, ret;
+
+	for (i = 0; i < MAX_DATA_BUFS; i++) {
+
+		/* allocate a buffer */
+		buf = data_alloc_buffer(priv->bufsize);
+		if (!buf)
+			break;
+
+		/* map it for DMA */
+		ret = videobuf_dma_map(priv->dev, &buf->vb);
+		if (ret) {
+			data_free_buffer(buf);
+			break;
+		}
+
+		/* add it to the list of free buffers */
+		list_add_tail(&buf->entry, &priv->free);
+		priv->num_buffers++;
+	}
+
+	/* Make sure we allocated the minimum required number of buffers */
+	if (priv->num_buffers < MIN_DATA_BUFS) {
+		dev_err(priv->dev, "Unable to allocate enough data buffers\n");
+		data_free_buffers(priv);
+		return -ENOMEM;
+	}
+
+	/* Warn if we are running in a degraded state, but do not fail */
+	if (priv->num_buffers < MAX_DATA_BUFS) {
+		dev_warn(priv->dev,
+			 "Unable to allocate %d buffers, using %d buffers instead\n",
+			 MAX_DATA_BUFS, i);
+	}
+
+	return 0;
+}
+
+/*
+ * DMA Operations Helpers
+ */
+
+/**
+ * fpga_start_addr() - get the physical address a DATA-FPGA
+ * @priv: the driver's private data structure
+ * @fpga: the DATA-FPGA number (zero based)
+ */
+static dma_addr_t fpga_start_addr(struct fpga_device *priv, unsigned int fpga)
+{
+	return priv->phys_addr + 0x400000 + (0x80000 * fpga);
+}
+
+/**
+ * fpga_block_addr() - get the physical address of a correlation data block
+ * @priv: the driver's private data structure
+ * @fpga: the DATA-FPGA number (zero based)
+ * @blknum: the correlation block number (zero based)
+ */
+static dma_addr_t fpga_block_addr(struct fpga_device *priv, unsigned int fpga,
+				  unsigned int blknum)
+{
+	return fpga_start_addr(priv, fpga) + (0x10000 * (1 + blknum));
+}
+
+#define REG_BLOCK_SIZE	(32 * 4)
+
+/**
+ * data_setup_corl_table() - create the scatterlist for correlation dumps
+ * @priv: the driver's private data structure
+ *
+ * Create the scatterlist for transferring a correlation dump from the
+ * DATA FPGAs. This structure will be reused for each buffer than needs
+ * to be filled with correlation data.
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int data_setup_corl_table(struct fpga_device *priv)
+{
+	struct sg_table *table = &priv->corl_table;
+	struct scatterlist *sg;
+	struct fpga_info *info;
+	int i, j, ret;
+
+	/* Calculate the number of entries needed */
+	priv->corl_nents = (1 + NUM_FPGA) * REG_BLOCK_SIZE;
+	for (i = 0; i < NUM_FPGA; i++)
+		priv->corl_nents += priv->info[i].num_lag_ram;
+
+	/* Allocate the scatterlist table */
+	ret = sg_alloc_table(table, priv->corl_nents, GFP_KERNEL);
+	if (ret) {
+		dev_err(priv->dev, "unable to allocate DMA table\n");
+		return ret;
+	}
+
+	/* Add the DATA FPGA registers to the scatterlist */
+	sg = table->sgl;
+	for (i = 0; i < NUM_FPGA; i++) {
+		sg_dma_address(sg) = fpga_start_addr(priv, i);
+		sg_dma_len(sg) = REG_BLOCK_SIZE;
+		sg = sg_next(sg);
+	}
+
+	/* Add the SYS-FPGA registers to the scatterlist */
+	sg_dma_address(sg) = SYS_FPGA_BLOCK;
+	sg_dma_len(sg) = REG_BLOCK_SIZE;
+	sg = sg_next(sg);
+
+	/* Add the FPGA correlation data blocks to the scatterlist */
+	for (i = 0; i < NUM_FPGA; i++) {
+		info = &priv->info[i];
+		for (j = 0; j < info->num_lag_ram; j++) {
+			sg_dma_address(sg) = fpga_block_addr(priv, i, j);
+			sg_dma_len(sg) = info->blk_size;
+			sg = sg_next(sg);
+		}
+	}
+
+	/*
+	 * All physical addresses and lengths are present in the structure
+	 * now. It can be reused for every FPGA DATA interrupt
+	 */
+	return 0;
+}
+
+/*
+ * FPGA Register Access Helpers
+ */
+
+static void fpga_write_reg(struct fpga_device *priv, unsigned int fpga,
+			   unsigned int reg, u32 val)
+{
+	const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE);
+	iowrite32be(val, priv->regs + fpga_start + reg);
+}
+
+static u32 fpga_read_reg(struct fpga_device *priv, unsigned int fpga,
+			 unsigned int reg)
+{
+	const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE);
+	return ioread32be(priv->regs + fpga_start + reg);
+}
+
+/**
+ * data_calculate_bufsize() - calculate the data buffer size required
+ * @priv: the driver's private data structure
+ *
+ * Calculate the total buffer size needed to hold a single block
+ * of correlation data
+ *
+ * CONTEXT: user
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int data_calculate_bufsize(struct fpga_device *priv)
+{
+	u32 num_corl, num_lags, num_meta, num_qcnt, num_pack;
+	u32 conf1, conf2, version;
+	u32 num_lag_ram, blk_size;
+	int i;
+
+	/* Each buffer starts with the 5 FPGA register areas */
+	priv->bufsize = (1 + NUM_FPGA) * REG_BLOCK_SIZE;
+
+	/* Read and store the configuration data for each FPGA */
+	for (i = 0; i < NUM_FPGA; i++) {
+		version = fpga_read_reg(priv, i, MMAP_REG_VERSION);
+		conf1 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF1);
+		conf2 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF2);
+
+		/* minor version 2 and later */
+		if ((version & 0x000000FF) >= 2) {
+			num_corl = (conf1 & 0x000000F0) >> 4;
+			num_pack = (conf1 & 0x00000F00) >> 8;
+			num_lags = (conf1 & 0x00FFF000) >> 12;
+			num_meta = (conf1 & 0x7F000000) >> 24;
+			num_qcnt = (conf2 & 0x00000FFF) >> 0;
+		} else {
+			num_corl = (conf1 & 0x000000F0) >> 4;
+			num_pack = 1; /* implied */
+			num_lags = (conf1 & 0x000FFF00) >> 8;
+			num_meta = (conf1 & 0x7FF00000) >> 20;
+			num_qcnt = (conf2 & 0x00000FFF) >> 0;
+		}
+
+		num_lag_ram = (num_corl + num_pack - 1) / num_pack;
+		blk_size = ((num_pack * num_lags) + num_meta + num_qcnt) * 8;
+
+		priv->info[i].num_lag_ram = num_lag_ram;
+		priv->info[i].blk_size = blk_size;
+		priv->bufsize += num_lag_ram * blk_size;
+
+		dev_dbg(priv->dev, "FPGA %d NUM_CORL: %d\n", i, num_corl);
+		dev_dbg(priv->dev, "FPGA %d NUM_PACK: %d\n", i, num_pack);
+		dev_dbg(priv->dev, "FPGA %d NUM_LAGS: %d\n", i, num_lags);
+		dev_dbg(priv->dev, "FPGA %d NUM_META: %d\n", i, num_meta);
+		dev_dbg(priv->dev, "FPGA %d NUM_QCNT: %d\n", i, num_qcnt);
+		dev_dbg(priv->dev, "FPGA %d BLK_SIZE: %d\n", i, blk_size);
+	}
+
+	dev_dbg(priv->dev, "TOTAL BUFFER SIZE: %zu bytes\n", priv->bufsize);
+	return 0;
+}
+
+/*
+ * Interrupt Handling
+ */
+
+/**
+ * data_disable_interrupts() - stop the device from generating interrupts
+ * @priv: the driver's private data structure
+ *
+ * Hide interrupts by switching to GPIO interrupt source
+ *
+ * LOCKING: must hold dev->lock
+ */
+static void data_disable_interrupts(struct fpga_device *priv)
+{
+	/* hide the interrupt by switching the IRQ driver to GPIO */
+	iowrite32be(0x2F, priv->regs + SYS_IRQ_SOURCE_CTL);
+}
+
+/**
+ * data_enable_interrupts() - allow the device to generate interrupts
+ * @priv: the driver's private data structure
+ *
+ * Unhide interrupts by switching to the FPGA interrupt source. At the
+ * same time, clear the DATA-FPGA status registers.
+ *
+ * LOCKING: must hold dev->lock
+ */
+static void data_enable_interrupts(struct fpga_device *priv)
+{
+	/* clear the actual FPGA corl_done interrupt */
+	fpga_write_reg(priv, 0, MMAP_REG_STATUS, 0x0);
+	fpga_write_reg(priv, 1, MMAP_REG_STATUS, 0x0);
+	fpga_write_reg(priv, 2, MMAP_REG_STATUS, 0x0);
+	fpga_write_reg(priv, 3, MMAP_REG_STATUS, 0x0);
+
+	/* flush the writes */
+	fpga_read_reg(priv, 0, MMAP_REG_STATUS);
+
+	/* switch back to the external interrupt source */
+	iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL);
+}
+
+/**
+ * data_dma_cb() - DMAEngine callback for DMA completion
+ * @data: the driver's private data structure
+ *
+ * Complete a DMA transfer from the DATA-FPGA's
+ *
+ * This is called via the DMA callback mechanism, and will handle moving the
+ * completed DMA transaction to the used list, and then wake any processes
+ * waiting for new data
+ *
+ * CONTEXT: any, softirq expected
+ */
+static void data_dma_cb(void *data)
+{
+	struct fpga_device *priv = data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	/* If there is no inflight buffer, we've got a bug */
+	BUG_ON(priv->inflight == NULL);
+
+	/* Move the inflight buffer onto the used list */
+	list_move_tail(&priv->inflight->entry, &priv->used);
+	priv->inflight = NULL;
+
+	/* clear the FPGA status and re-enable interrupts */
+	data_enable_interrupts(priv);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	/*
+	 * We've changed both the inflight and used lists, so we need
+	 * to wake up any processes that are blocking for those events
+	 */
+	wake_up(&priv->wait);
+}
+
+/**
+ * data_submit_dma() - prepare and submit the required DMA to fill a buffer
+ * @priv: the driver's private data structure
+ * @buf: the data buffer
+ *
+ * Prepare and submit the necessary DMA transactions to fill a correlation
+ * data buffer.
+ *
+ * LOCKING: must hold dev->lock
+ * CONTEXT: hardirq only
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
+{
+	struct scatterlist *dst_sg, *src_sg;
+	unsigned int dst_nents, src_nents;
+	struct dma_chan *chan = priv->chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	dma_addr_t dst, src;
+
+	dst_sg = buf->vb.sglist;
+	dst_nents = buf->vb.sglen;
+
+	src_sg = priv->corl_table.sgl;
+	src_nents = priv->corl_nents;
+
+	/*
+	 * All buffers passed to this function should be ready and mapped
+	 * for DMA already. Therefore, we don't need to do anything except
+	 * submit it to the Freescale DMA Engine for processing
+	 */
+
+	/* setup the scatterlist to scatterlist transfer */
+	tx = chan->device->device_prep_dma_sg(chan,
+					      dst_sg, dst_nents,
+					      src_sg, src_nents,
+					      0);
+	if (!tx) {
+		dev_err(priv->dev, "unable to prep scatterlist DMA\n");
+		return -ENOMEM;
+	}
+
+	/* submit the transaction to the DMA controller */
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(priv->dev, "unable to submit scatterlist DMA\n");
+		return -ENOMEM;
+	}
+
+	/* Prepare the re-read of the SYS-FPGA block */
+	dst = sg_dma_address(dst_sg) + (NUM_FPGA * REG_BLOCK_SIZE);
+	src = SYS_FPGA_BLOCK;
+	tx = chan->device->device_prep_dma_memcpy(chan, dst, src,
+						  REG_BLOCK_SIZE,
+						  DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(priv->dev, "unable to prep SYS-FPGA DMA\n");
+		return -ENOMEM;
+	}
+
+	/* Setup the callback */
+	tx->callback = data_dma_cb;
+	tx->callback_param = priv;
+
+	/* submit the transaction to the DMA controller */
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(priv->dev, "unable to submit SYS-FPGA DMA\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+#define CORL_DONE	0x1
+#define CORL_ERR	0x2
+
+static irqreturn_t data_irq(int irq, void *dev_id)
+{
+	struct fpga_device *priv = dev_id;
+	bool submitted = false;
+	struct data_buf *buf;
+	u32 status;
+	int i;
+
+	/* detect spurious interrupts via FPGA status */
+	for (i = 0; i < 4; i++) {
+		status = fpga_read_reg(priv, i, MMAP_REG_STATUS);
+		if (!(status & (CORL_DONE | CORL_ERR))) {
+			dev_err(priv->dev, "spurious irq detected (FPGA)\n");
+			return IRQ_NONE;
+		}
+	}
+
+	/* detect spurious interrupts via raw IRQ pin readback */
+	status = ioread32be(priv->regs + SYS_IRQ_INPUT_DATA);
+	if (status & IRQ_CORL_DONE) {
+		dev_err(priv->dev, "spurious irq detected (IRQ)\n");
+		return IRQ_NONE;
+	}
+
+	spin_lock(&priv->lock);
+
+	/* hide the interrupt by switching the IRQ driver to GPIO */
+	data_disable_interrupts(priv);
+
+	/* If there are no free buffers, drop this data */
+	if (list_empty(&priv->free)) {
+		priv->num_dropped++;
+		goto out;
+	}
+
+	buf = list_first_entry(&priv->free, struct data_buf, entry);
+	list_del_init(&buf->entry);
+	BUG_ON(buf->size != priv->bufsize);
+
+	/* Submit a DMA transfer to get the correlation data */
+	if (data_submit_dma(priv, buf)) {
+		dev_err(priv->dev, "Unable to setup DMA transfer\n");
+		list_move_tail(&buf->entry, &priv->free);
+		goto out;
+	}
+
+	/* Save the buffer for the DMA callback */
+	priv->inflight = buf;
+	submitted = true;
+
+	/* Start the DMA Engine */
+	dma_async_memcpy_issue_pending(priv->chan);
+
+out:
+	/* If no DMA was submitted, re-enable interrupts */
+	if (!submitted)
+		data_enable_interrupts(priv);
+
+	spin_unlock(&priv->lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * Realtime Device Enable Helpers
+ */
+
+/**
+ * data_device_enable() - enable the device for buffered dumping
+ * @priv: the driver's private data structure
+ *
+ * Enable the device for buffered dumping. Allocates buffers and hooks up
+ * the interrupt handler. When this finishes, data will come pouring in.
+ *
+ * LOCKING: must hold dev->mutex
+ * CONTEXT: user context only
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int data_device_enable(struct fpga_device *priv)
+{
+	u32 val;
+	int ret;
+
+	/* multiple enables are safe: they do nothing */
+	if (priv->enabled)
+		return 0;
+
+	/* check that the FPGAs are programmed */
+	val = ioread32be(priv->regs + SYS_FPGA_CONFIG_STATUS);
+	if (!(val & (1 << 18))) {
+		dev_err(priv->dev, "DATA-FPGAs are not enabled\n");
+		return -ENODATA;
+	}
+
+	/* read the FPGAs to calculate the buffer size */
+	ret = data_calculate_bufsize(priv);
+	if (ret) {
+		dev_err(priv->dev, "unable to calculate buffer size\n");
+		goto out_error;
+	}
+
+	/* allocate the correlation data buffers */
+	ret = data_alloc_buffers(priv);
+	if (ret) {
+		dev_err(priv->dev, "unable to allocate buffers\n");
+		goto out_error;
+	}
+
+	/* setup the source scatterlist for dumping correlation data */
+	ret = data_setup_corl_table(priv);
+	if (ret) {
+		dev_err(priv->dev, "unable to setup correlation DMA table\n");
+		goto out_error;
+	}
+
+	/* hookup the irq handler */
+	ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv);
+	if (ret) {
+		dev_err(priv->dev, "unable to request IRQ handler\n");
+		goto out_error;
+	}
+
+	/* switch to the external FPGA IRQ line */
+	data_enable_interrupts(priv);
+
+	/* success, we're enabled */
+	priv->enabled = true;
+	return 0;
+
+out_error:
+	sg_free_table(&priv->corl_table);
+	priv->corl_nents = 0;
+
+	data_free_buffers(priv);
+	return ret;
+}
+
+/**
+ * data_device_disable() - disable the device for buffered dumping
+ * @priv: the driver's private data structure
+ *
+ * Disable the device for buffered dumping. Stops new DMA transactions from
+ * being generated, waits for all outstanding DMA to complete, and then frees
+ * all buffers.
+ *
+ * LOCKING: must hold dev->mutex
+ * CONTEXT: user only
+ *
+ * Returns 0 on success, -ERRNO otherwise
+ */
+static int data_device_disable(struct fpga_device *priv)
+{
+	int ret;
+
+	/* allow multiple disable */
+	if (!priv->enabled)
+		return 0;
+
+	/* switch to the internal GPIO IRQ line */
+	data_disable_interrupts(priv);
+
+	/* unhook the irq handler */
+	free_irq(priv->irq, priv);
+
+	/*
+	 * wait for all outstanding DMA to complete
+	 *
+	 * Device interrupts are disabled, therefore another buffer cannot
+	 * be marked inflight.
+	 */
+	ret = wait_event_interruptible(priv->wait, priv->inflight == NULL);
+	if (ret)
+		return ret;
+
+	/* free the correlation table */
+	sg_free_table(&priv->corl_table);
+	priv->corl_nents = 0;
+
+	/*
+	 * We are taking the spinlock not to protect priv->enabled, but instead
+	 * to make sure that there are no readers in the process of altering
+	 * the free or used lists while we are setting this flag.
+	 */
+	spin_lock_irq(&priv->lock);
+	priv->enabled = false;
+	spin_unlock_irq(&priv->lock);
+
+	/* free all buffers: the free and used lists are not being changed */
+	data_free_buffers(priv);
+	return 0;
+}
+
+/*
+ * DEBUGFS Interface
+ */
+#ifdef CONFIG_DEBUG_FS
+
+/*
+ * Count the number of entries in the given list
+ */
+static unsigned int list_num_entries(struct list_head *list)
+{
+	struct list_head *entry;
+	unsigned int ret = 0;
+
+	list_for_each(entry, list)
+		ret++;
+
+	return ret;
+}
+
+static int data_debug_show(struct seq_file *f, void *offset)
+{
+	struct fpga_device *priv = f->private;
+	int ret;
+
+	/*
+	 * Lock the mutex first, so that we get an accurate value for enable
+	 * Lock the spinlock next, to get accurate list counts
+	 */
+	ret = mutex_lock_interruptible(&priv->mutex);
+	if (ret)
+		return ret;
+
+	spin_lock_irq(&priv->lock);
+
+	seq_printf(f, "enabled: %d\n", priv->enabled);
+	seq_printf(f, "bufsize: %d\n", priv->bufsize);
+	seq_printf(f, "num_buffers: %d\n", priv->num_buffers);
+	seq_printf(f, "num_free: %d\n", list_num_entries(&priv->free));
+	seq_printf(f, "inflight: %d\n", priv->inflight != NULL);
+	seq_printf(f, "num_used: %d\n", list_num_entries(&priv->used));
+	seq_printf(f, "num_dropped: %d\n", priv->num_dropped);
+
+	spin_unlock_irq(&priv->lock);
+	mutex_unlock(&priv->mutex);
+	return 0;
+}
+
+static int data_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, data_debug_show, inode->i_private);
+}
+
+static const struct file_operations data_debug_fops = {
+	.owner		= THIS_MODULE,
+	.open		= data_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int data_debugfs_init(struct fpga_device *priv)
+{
+	priv->dbg_entry = debugfs_create_file(drv_name, S_IRUGO, NULL, priv,
+					      &data_debug_fops);
+	if (IS_ERR(priv->dbg_entry))
+		return PTR_ERR(priv->dbg_entry);
+
+	return 0;
+}
+
+static void data_debugfs_exit(struct fpga_device *priv)
+{
+	debugfs_remove(priv->dbg_entry);
+}
+
+#else
+
+static inline int data_debugfs_init(struct fpga_device *priv)
+{
+	return 0;
+}
+
+static inline void data_debugfs_exit(struct fpga_device *priv)
+{
+}
+
+#endif	/* CONFIG_DEBUG_FS */
+
+/*
+ * SYSFS Attributes
+ */
+
+static ssize_t data_en_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct fpga_device *priv = dev_get_drvdata(dev);
+	return snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled);
+}
+
+static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct fpga_device *priv = dev_get_drvdata(dev);
+	unsigned long enable;
+	int ret;
+
+	ret = strict_strtoul(buf, 0, &enable);
+	if (ret) {
+		dev_err(priv->dev, "unable to parse enable input\n");
+		return -EINVAL;
+	}
+
+	ret = mutex_lock_interruptible(&priv->mutex);
+	if (ret)
+		return ret;
+
+	if (enable)
+		ret = data_device_enable(priv);
+	else
+		ret = data_device_disable(priv);
+
+	if (ret) {
+		dev_err(priv->dev, "device %s failed\n",
+			enable ? "enable" : "disable");
+		count = ret;
+		goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&priv->mutex);
+	return count;
+}
+
+static DEVICE_ATTR(enable, S_IWUSR | S_IRUGO, data_en_show, data_en_set);
+
+static struct attribute *data_sysfs_attrs[] = {
+	&dev_attr_enable.attr,
+	NULL,
+};
+
+static const struct attribute_group rt_sysfs_attr_group = {
+	.attrs = data_sysfs_attrs,
+};
+
+/*
+ * FPGA Realtime Data Character Device
+ */
+
+static int data_open(struct inode *inode, struct file *filp)
+{
+	/*
+	 * The miscdevice layer puts our struct miscdevice into the
+	 * filp->private_data field. We use this to find our private
+	 * data and then overwrite it with our own private structure.
+	 */
+	struct fpga_device *priv = container_of(filp->private_data,
+						struct fpga_device, miscdev);
+	struct fpga_reader *reader;
+	int ret;
+
+	/* allocate private data */
+	reader = kzalloc(sizeof(*reader), GFP_KERNEL);
+	if (!reader)
+		return -ENOMEM;
+
+	reader->priv = priv;
+	reader->buf = NULL;
+
+	filp->private_data = reader;
+	ret = nonseekable_open(inode, filp);
+	if (ret) {
+		dev_err(priv->dev, "nonseekable-open failed\n");
+		kfree(reader);
+		return ret;
+	}
+
+	/*
+	 * success, increase the reference count of the private data structure
+	 * so that it doesn't disappear if the device is unbound
+	 */
+	kref_get(&priv->ref);
+	return 0;
+}
+
+static int data_release(struct inode *inode, struct file *filp)
+{
+	struct fpga_reader *reader = filp->private_data;
+	struct fpga_device *priv = reader->priv;
+
+	/* free the per-reader structure */
+	data_free_buffer(reader->buf);
+	kfree(reader);
+	filp->private_data = NULL;
+
+	/* decrement our reference count to the private data */
+	kref_put(&priv->ref, fpga_device_release);
+	return 0;
+}
+
+static ssize_t data_read(struct file *filp, char __user *ubuf, size_t count,
+			 loff_t *f_pos)
+{
+	struct fpga_reader *reader = filp->private_data;
+	struct fpga_device *priv = reader->priv;
+	struct list_head *used = &priv->used;
+	struct data_buf *dbuf;
+	size_t avail;
+	void *data;
+	int ret;
+
+	/* check if we already have a partial buffer */
+	if (reader->buf) {
+		dbuf = reader->buf;
+		goto have_buffer;
+	}
+
+	spin_lock_irq(&priv->lock);
+
+	/* Block until there is at least one buffer on the used list */
+	while (list_empty(used)) {
+		spin_unlock_irq(&priv->lock);
+
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		ret = wait_event_interruptible(priv->wait, !list_empty(used));
+		if (ret)
+			return ret;
+
+		spin_lock_irq(&priv->lock);
+	}
+
+	/* Grab the first buffer off of the used list */
+	dbuf = list_first_entry(used, struct data_buf, entry);
+	list_del_init(&dbuf->entry);
+
+	spin_unlock_irq(&priv->lock);
+
+	/* Buffers are always mapped: unmap it */
+	videobuf_dma_unmap(priv->dev, &dbuf->vb);
+
+	/* save the buffer for later */
+	reader->buf = dbuf;
+	reader->buf_start = 0;
+
+have_buffer:
+	/* Get the number of bytes available */
+	avail = dbuf->size - reader->buf_start;
+	data = dbuf->vb.vaddr + reader->buf_start;
+
+	/* Get the number of bytes we can transfer */
+	count = min(count, avail);
+
+	/* Copy the data to the userspace buffer */
+	if (copy_to_user(ubuf, data, count))
+		return -EFAULT;
+
+	/* Update the amount of available space */
+	avail -= count;
+
+	/*
+	 * If there is still some data available, save the buffer for the
+	 * next userspace call to read() and return
+	 */
+	if (avail > 0) {
+		reader->buf_start += count;
+		reader->buf = dbuf;
+		return count;
+	}
+
+	/*
+	 * Get the buffer ready to be reused for DMA
+	 *
+	 * If it fails, we pretend that the read never happed and return
+	 * -EFAULT to userspace. The read will be retried.
+	 */
+	ret = videobuf_dma_map(priv->dev, &dbuf->vb);
+	if (ret) {
+		dev_err(priv->dev, "unable to remap buffer for DMA\n");
+		return -EFAULT;
+	}
+
+	/* Lock against concurrent enable/disable */
+	spin_lock_irq(&priv->lock);
+
+	/* the reader is finished with this buffer */
+	reader->buf = NULL;
+
+	/*
+	 * One of two things has happened, the device is disabled, or the
+	 * device has been reconfigured underneath us. In either case, we
+	 * should just throw away the buffer.
+	 */
+	if (!priv->enabled || dbuf->size != priv->bufsize) {
+		videobuf_dma_unmap(priv->dev, &dbuf->vb);
+		data_free_buffer(dbuf);
+		goto out_unlock;
+	}
+
+	/* The buffer is safe to reuse, so add it back to the free list */
+	list_add_tail(&dbuf->entry, &priv->free);
+
+out_unlock:
+	spin_unlock_irq(&priv->lock);
+	return count;
+}
+
+static unsigned int data_poll(struct file *filp, struct poll_table_struct *tbl)
+{
+	struct fpga_reader *reader = filp->private_data;
+	struct fpga_device *priv = reader->priv;
+	unsigned int mask = 0;
+
+	poll_wait(filp, &priv->wait, tbl);
+
+	if (!list_empty(&priv->used))
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+static int data_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct fpga_reader *reader = filp->private_data;
+	struct fpga_device *priv = reader->priv;
+	unsigned long offset, vsize, psize, addr;
+
+	/* VMA properties */
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	vsize = vma->vm_end - vma->vm_start;
+	psize = priv->phys_size - offset;
+	addr = (priv->phys_addr + offset) >> PAGE_SHIFT;
+
+	/* Check against the FPGA region's physical memory size */
+	if (vsize > psize) {
+		dev_err(priv->dev, "requested mmap mapping too large\n");
+		return -EINVAL;
+	}
+
+	/* IO memory (stop cacheing) */
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return io_remap_pfn_range(vma, vma->vm_start, addr, vsize,
+				  vma->vm_page_prot);
+}
+
+static const struct file_operations data_fops = {
+	.owner		= THIS_MODULE,
+	.open		= data_open,
+	.release	= data_release,
+	.read		= data_read,
+	.poll		= data_poll,
+	.mmap		= data_mmap,
+	.llseek		= no_llseek,
+};
+
+/*
+ * OpenFirmware Device Subsystem
+ */
+
+static bool dma_filter(struct dma_chan *chan, void *data)
+{
+	/*
+	 * DMA Channel #0 is used for the FPGA Programmer, so ignore it
+	 *
+	 * This probably won't survive an unload/load cycle of the Freescale
+	 * DMAEngine driver, but that won't be a problem
+	 */
+	if (chan->chan_id == 0 && chan->device->dev_id == 0)
+		return false;
+
+	return true;
+}
+
+static int data_of_probe(struct platform_device *op,
+			 const struct of_device_id *match)
+{
+	struct device_node *of_node = op->dev.of_node;
+	struct device *this_device;
+	struct fpga_device *priv;
+	struct resource res;
+	dma_cap_mask_t mask;
+	int ret;
+
+	/* Allocate private data */
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&op->dev, "Unable to allocate device private data\n");
+		ret = -ENOMEM;
+		goto out_return;
+	}
+
+	dev_set_drvdata(&op->dev, priv);
+	priv->dev = &op->dev;
+	kref_init(&priv->ref);
+	mutex_init(&priv->mutex);
+
+	dev_set_drvdata(priv->dev, priv);
+	spin_lock_init(&priv->lock);
+	INIT_LIST_HEAD(&priv->free);
+	INIT_LIST_HEAD(&priv->used);
+	init_waitqueue_head(&priv->wait);
+
+	/* Setup the misc device */
+	priv->miscdev.minor = MISC_DYNAMIC_MINOR;
+	priv->miscdev.name = drv_name;
+	priv->miscdev.fops = &data_fops;
+
+	/* Get the physical address of the FPGA registers */
+	ret = of_address_to_resource(of_node, 0, &res);
+	if (ret) {
+		dev_err(&op->dev, "Unable to find FPGA physical address\n");
+		ret = -ENODEV;
+		goto out_free_priv;
+	}
+
+	priv->phys_addr = res.start;
+	priv->phys_size = resource_size(&res);
+
+	/* ioremap the registers for use */
+	priv->regs = of_iomap(of_node, 0);
+	if (!priv->regs) {
+		dev_err(&op->dev, "Unable to ioremap registers\n");
+		ret = -ENOMEM;
+		goto out_free_priv;
+	}
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+	dma_cap_set(DMA_INTERRUPT, mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_cap_set(DMA_SG, mask);
+
+	/* Request a DMA channel */
+	priv->chan = dma_request_channel(mask, dma_filter, NULL);
+	if (!priv->chan) {
+		dev_err(&op->dev, "Unable to request DMA channel\n");
+		ret = -ENODEV;
+		goto out_unmap_regs;
+	}
+
+	/* Find the correct IRQ number */
+	priv->irq = irq_of_parse_and_map(of_node, 0);
+	if (priv->irq == NO_IRQ) {
+		dev_err(&op->dev, "Unable to find IRQ line\n");
+		ret = -ENODEV;
+		goto out_release_dma;
+	}
+
+	/* Drive the GPIO for FPGA IRQ high (no interrupt) */
+	iowrite32be(IRQ_CORL_DONE, priv->regs + SYS_IRQ_OUTPUT_DATA);
+
+	/* Register the miscdevice */
+	ret = misc_register(&priv->miscdev);
+	if (ret) {
+		dev_err(&op->dev, "Unable to register miscdevice\n");
+		goto out_irq_dispose_mapping;
+	}
+
+	/* Create the debugfs files */
+	ret = data_debugfs_init(priv);
+	if (ret) {
+		dev_err(&op->dev, "Unable to create debugfs files\n");
+		goto out_misc_deregister;
+	}
+
+	/* Create the sysfs files */
+	this_device = priv->miscdev.this_device;
+	dev_set_drvdata(this_device, priv);
+	ret = sysfs_create_group(&this_device->kobj, &rt_sysfs_attr_group);
+	if (ret) {
+		dev_err(&op->dev, "Unable to create sysfs files\n");
+		goto out_data_debugfs_exit;
+	}
+
+	dev_info(&op->dev, "CARMA FPGA Realtime Data Driver Loaded\n");
+	return 0;
+
+out_data_debugfs_exit:
+	data_debugfs_exit(priv);
+out_misc_deregister:
+	misc_deregister(&priv->miscdev);
+out_irq_dispose_mapping:
+	irq_dispose_mapping(priv->irq);
+out_release_dma:
+	dma_release_channel(priv->chan);
+out_unmap_regs:
+	iounmap(priv->regs);
+out_free_priv:
+	kref_put(&priv->ref, fpga_device_release);
+out_return:
+	return ret;
+}
+
+static int data_of_remove(struct platform_device *op)
+{
+	struct fpga_device *priv = dev_get_drvdata(&op->dev);
+	struct device *this_device = priv->miscdev.this_device;
+
+	/* remove all sysfs files, now the device cannot be re-enabled */
+	sysfs_remove_group(&this_device->kobj, &rt_sysfs_attr_group);
+
+	/* remove all debugfs files */
+	data_debugfs_exit(priv);
+
+	/* disable the device from generating data */
+	data_device_disable(priv);
+
+	/* remove the character device to stop new readers from appearing */
+	misc_deregister(&priv->miscdev);
+
+	/* cleanup everything not needed by readers */
+	irq_dispose_mapping(priv->irq);
+	dma_release_channel(priv->chan);
+	iounmap(priv->regs);
+
+	/* release our reference */
+	kref_put(&priv->ref, fpga_device_release);
+	return 0;
+}
+
+static struct of_device_id data_of_match[] = {
+	{ .compatible = "carma,carma-fpga", },
+	{},
+};
+
+static struct of_platform_driver data_of_driver = {
+	.probe		= data_of_probe,
+	.remove		= data_of_remove,
+	.driver		= {
+		.name		= drv_name,
+		.of_match_table	= data_of_match,
+		.owner		= THIS_MODULE,
+	},
+};
+
+/*
+ * Module Init / Exit
+ */
+
+static int __init data_init(void)
+{
+	return of_register_platform_driver(&data_of_driver);
+}
+
+static void __exit data_exit(void)
+{
+	of_unregister_platform_driver(&data_of_driver);
+}
+
+MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>");
+MODULE_DESCRIPTION("CARMA DATA-FPGA Access Driver");
+MODULE_LICENSE("GPL");
+
+module_init(data_init);
+module_exit(data_exit);
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 38657cdaf54d..c4acac74725c 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -33,6 +33,7 @@
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <linux/security.h>
+#include <linux/prefetch.h>
 #include <asm/pgtable.h>
 #include "gru.h"
 #include "grutables.h"
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 20e4e9395b61..ecafa4ba238b 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -348,15 +348,15 @@ static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep)
 
 static int gru_irq_count[GRU_CHIPLETS_PER_BLADE];
 
-static void gru_noop(unsigned int irq)
+static void gru_noop(struct irq_data *d)
 {
 }
 
 static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = {
 	[0 ... GRU_CHIPLETS_PER_BLADE - 1] {
-		.mask		= gru_noop,
-		.unmask		= gru_noop,
-		.ack		= gru_noop
+		.irq_mask	= gru_noop,
+		.irq_unmask	= gru_noop,
+		.irq_ack	= gru_noop
 	}
 };
 
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index f8538bbd0bfa..ae16c8cb4f3e 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -28,6 +28,7 @@
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/err.h>
+#include <linux/prefetch.h>
 #include <asm/uv/uv_hub.h>
 #include "gru.h"
 #include "grutables.h"
diff --git a/drivers/misc/ti-st/Kconfig b/drivers/misc/ti-st/Kconfig
index 2c8c3f39710d..abb5de1afce3 100644
--- a/drivers/misc/ti-st/Kconfig
+++ b/drivers/misc/ti-st/Kconfig
@@ -5,7 +5,7 @@
 menu "Texas Instruments shared transport line discipline"
 config TI_ST
 	tristate "Shared transport core driver"
-	depends on RFKILL
+	depends on NET && GPIOLIB
 	select FW_LOADER
 	help
 	  This enables the shared transport core driver for TI
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 486117f72c9f..f91f82eabda7 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -43,13 +43,15 @@ static void add_channel_to_table(struct st_data_s *st_gdata,
 	pr_info("%s: id %d\n", __func__, new_proto->chnl_id);
 	/* list now has the channel id as index itself */
 	st_gdata->list[new_proto->chnl_id] = new_proto;
+	st_gdata->is_registered[new_proto->chnl_id] = true;
 }
 
 static void remove_channel_from_table(struct st_data_s *st_gdata,
 		struct st_proto_s *proto)
 {
 	pr_info("%s: id %d\n", __func__, proto->chnl_id);
-	st_gdata->list[proto->chnl_id] = NULL;
+/*	st_gdata->list[proto->chnl_id] = NULL; */
+	st_gdata->is_registered[proto->chnl_id] = false;
 }
 
 /*
@@ -104,7 +106,7 @@ void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata)
 
 	if (unlikely
 	    (st_gdata == NULL || st_gdata->rx_skb == NULL
-	     || st_gdata->list[chnl_id] == NULL)) {
+	     || st_gdata->is_registered[chnl_id] == false)) {
 		pr_err("chnl_id %d not registered, no data to send?",
 			   chnl_id);
 		kfree_skb(st_gdata->rx_skb);
@@ -141,14 +143,15 @@ void st_reg_complete(struct st_data_s *st_gdata, char err)
 	unsigned char i = 0;
 	pr_info(" %s ", __func__);
 	for (i = 0; i < ST_MAX_CHANNELS; i++) {
-		if (likely(st_gdata != NULL && st_gdata->list[i] != NULL &&
-			   st_gdata->list[i]->reg_complete_cb != NULL)) {
+		if (likely(st_gdata != NULL &&
+			st_gdata->is_registered[i] == true &&
+				st_gdata->list[i]->reg_complete_cb != NULL)) {
 			st_gdata->list[i]->reg_complete_cb
 				(st_gdata->list[i]->priv_data, err);
 			pr_info("protocol %d's cb sent %d\n", i, err);
 			if (err) { /* cleanup registered protocol */
 				st_gdata->protos_registered--;
-				st_gdata->list[i] = NULL;
+				st_gdata->is_registered[i] = false;
 			}
 		}
 	}
@@ -475,9 +478,9 @@ void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf)
 {
 	seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n",
 			st_gdata->protos_registered,
-			st_gdata->list[0x04] != NULL ? 'R' : 'U',
-			st_gdata->list[0x08] != NULL ? 'R' : 'U',
-			st_gdata->list[0x09] != NULL ? 'R' : 'U');
+			st_gdata->is_registered[0x04] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x08] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x09] == true ? 'R' : 'U');
 }
 
 /********************************************************************/
@@ -504,7 +507,7 @@ long st_register(struct st_proto_s *new_proto)
 		return -EPROTONOSUPPORT;
 	}
 
-	if (st_gdata->list[new_proto->chnl_id] != NULL) {
+	if (st_gdata->is_registered[new_proto->chnl_id] == true) {
 		pr_err("chnl_id %d already registered", new_proto->chnl_id);
 		return -EALREADY;
 	}
@@ -563,7 +566,7 @@ long st_register(struct st_proto_s *new_proto)
 		/* check for already registered once more,
 		 * since the above check is old
 		 */
-		if (st_gdata->list[new_proto->chnl_id] != NULL) {
+		if (st_gdata->is_registered[new_proto->chnl_id] == true) {
 			pr_err(" proto %d already registered ",
 				   new_proto->chnl_id);
 			return -EALREADY;
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index b4488c8f6b23..5da93ee6f6be 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -30,6 +30,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
+#include <linux/sysfs.h>
 #include <linux/tty.h>
 
 #include <linux/skbuff.h>
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 63667a8f140c..d6d62fd07ee9 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -284,6 +284,7 @@ int mmc_add_card(struct mmc_card *card)
 		type = "SD-combo";
 		if (mmc_card_blockaddr(card))
 			type = "SDHC-combo";
+		break;
 	default:
 		type = "?";
 		break;
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 2e032f0e8cf4..a6c329040140 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -832,7 +832,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 		return IRQ_HANDLED;
 	}
 
-	if (end_command)
+	if (end_command && host->cmd)
 		mmc_omap_cmd_done(host, host->cmd);
 	if (host->data != NULL) {
 		if (transfer_error)
diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c
index f9b611fc773e..60e4186a4345 100644
--- a/drivers/mmc/host/sdhci-of-core.c
+++ b/drivers/mmc/host/sdhci-of-core.c
@@ -124,8 +124,10 @@ static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
 #endif
 }
 
+static const struct of_device_id sdhci_of_match[];
 static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct sdhci_of_data *sdhci_of_data;
 	struct sdhci_host *host;
@@ -134,9 +136,10 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 	int size;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(sdhci_of_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	sdhci_of_data = ofdev->dev.of_match->data;
+	sdhci_of_data = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index a136be706347..f8b5f37007b2 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -957,6 +957,7 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
 	host->ioaddr = pci_ioremap_bar(pdev, bar);
 	if (!host->ioaddr) {
 		dev_err(&pdev->dev, "failed to remap registers\n");
+		ret = -ENOMEM;
 		goto release;
 	}
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 9e15f41f87be..5d20661bc357 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1334,6 +1334,13 @@ static void sdhci_tasklet_finish(unsigned long param)
 
 	host = (struct sdhci_host*)param;
 
+        /*
+         * If this tasklet gets rescheduled while running, it will
+         * be run again afterwards but without any active request.
+         */
+	if (!host->mrq)
+		return;
+
 	spin_lock_irqsave(&host->lock, flags);
 
 	del_timer(&host->timer);
@@ -1345,7 +1352,7 @@ static void sdhci_tasklet_finish(unsigned long param)
 	 * upon error conditions.
 	 */
 	if (!(host->flags & SDHCI_DEVICE_DEAD) &&
-		(mrq->cmd->error ||
+	    ((mrq->cmd && mrq->cmd->error) ||
 		 (mrq->data && (mrq->data->error ||
 		  (mrq->data->stop && mrq->data->stop->error))) ||
 		   (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))) {
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 62d37de6de76..710339a85c84 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -728,15 +728,15 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		tmio_mmc_set_clock(host, ios->clock);
 
 	/* Power sequence - OFF -> UP -> ON */
-	if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
+	if (ios->power_mode == MMC_POWER_UP) {
+		/* power up SD bus */
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 1);
+	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
 		/* power down SD bus */
 		if (ios->power_mode == MMC_POWER_OFF && host->set_pwr)
 			host->set_pwr(host->pdev, 0);
 		tmio_mmc_clk_stop(host);
-	} else if (ios->power_mode == MMC_POWER_UP) {
-		/* power up SD bus */
-		if (host->set_pwr)
-			host->set_pwr(host->pdev, 1);
 	} else {
 		/* start bus clock */
 		tmio_mmc_clk_start(host);
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 44b1f46458ca..5069111c81cc 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -260,6 +260,13 @@ config MTD_BCM963XX
 	  Support for parsing CFE image tag and creating MTD partitions on
 	  Broadcom BCM63xx boards.
 
+config MTD_LANTIQ
+	tristate "Lantiq SoC NOR support"
+	depends on LANTIQ
+	select MTD_PARTITIONS
+	help
+	  Support for NOR flash attached to the Lantiq SoC's External Bus Unit.
+
 config MTD_DILNETPC
 	tristate "CFI Flash device mapped on DIL/Net PC"
 	depends on X86 && MTD_PARTITIONS && MTD_CFI_INTELEXT && BROKEN
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 08533bd5cba7..6adf4c9b9057 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -60,3 +60,4 @@ obj-$(CONFIG_MTD_VMU)		+= vmu-flash.o
 obj-$(CONFIG_MTD_GPIO_ADDR)	+= gpio-addr-flash.o
 obj-$(CONFIG_MTD_BCM963XX)	+= bcm963xx-flash.o
 obj-$(CONFIG_MTD_LATCH_ADDR)	+= latch-addr-flash.o
+obj-$(CONFIG_MTD_LANTIQ)	+= lantiq-flash.o
diff --git a/drivers/mtd/maps/lantiq-flash.c b/drivers/mtd/maps/lantiq-flash.c
new file mode 100644
index 000000000000..a90cabd7b84d
--- /dev/null
+++ b/drivers/mtd/maps/lantiq-flash.c
@@ -0,0 +1,251 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2004 Liu Peng Infineon IFAP DC COM CPE
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/cfi.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/physmap.h>
+
+#include <lantiq_soc.h>
+#include <lantiq_platform.h>
+
+/*
+ * The NOR flash is connected to the same external bus unit (EBU) as PCI.
+ * To make PCI work we need to enable the endianness swapping for the address
+ * written to the EBU. This endianness swapping works for PCI correctly but
+ * fails for attached NOR devices. To workaround this we need to use a complex
+ * map. The workaround involves swapping all addresses whilst probing the chip.
+ * Once probing is complete we stop swapping the addresses but swizzle the
+ * unlock addresses to ensure that access to the NOR device works correctly.
+ */
+
+enum {
+	LTQ_NOR_PROBING,
+	LTQ_NOR_NORMAL
+};
+
+struct ltq_mtd {
+	struct resource *res;
+	struct mtd_info *mtd;
+	struct map_info *map;
+};
+
+static char ltq_map_name[] = "ltq_nor";
+
+static map_word
+ltq_read16(struct map_info *map, unsigned long adr)
+{
+	unsigned long flags;
+	map_word temp;
+
+	if (map->map_priv_1 == LTQ_NOR_PROBING)
+		adr ^= 2;
+	spin_lock_irqsave(&ebu_lock, flags);
+	temp.x[0] = *(u16 *)(map->virt + adr);
+	spin_unlock_irqrestore(&ebu_lock, flags);
+	return temp;
+}
+
+static void
+ltq_write16(struct map_info *map, map_word d, unsigned long adr)
+{
+	unsigned long flags;
+
+	if (map->map_priv_1 == LTQ_NOR_PROBING)
+		adr ^= 2;
+	spin_lock_irqsave(&ebu_lock, flags);
+	*(u16 *)(map->virt + adr) = d.x[0];
+	spin_unlock_irqrestore(&ebu_lock, flags);
+}
+
+/*
+ * The following 2 functions copy data between iomem and a cached memory
+ * section. As memcpy() makes use of pre-fetching we cannot use it here.
+ * The normal alternative of using memcpy_{to,from}io also makes use of
+ * memcpy() on MIPS so it is not applicable either. We are therefore stuck
+ * with having to use our own loop.
+ */
+static void
+ltq_copy_from(struct map_info *map, void *to,
+	unsigned long from, ssize_t len)
+{
+	unsigned char *f = (unsigned char *)map->virt + from;
+	unsigned char *t = (unsigned char *)to;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+	while (len--)
+		*t++ = *f++;
+	spin_unlock_irqrestore(&ebu_lock, flags);
+}
+
+static void
+ltq_copy_to(struct map_info *map, unsigned long to,
+	const void *from, ssize_t len)
+{
+	unsigned char *f = (unsigned char *)from;
+	unsigned char *t = (unsigned char *)map->virt + to;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+	while (len--)
+		*t++ = *f++;
+	spin_unlock_irqrestore(&ebu_lock, flags);
+}
+
+static const char const *part_probe_types[] = { "cmdlinepart", NULL };
+
+static int __init
+ltq_mtd_probe(struct platform_device *pdev)
+{
+	struct physmap_flash_data *ltq_mtd_data = dev_get_platdata(&pdev->dev);
+	struct ltq_mtd *ltq_mtd;
+	struct mtd_partition *parts;
+	struct resource *res;
+	int nr_parts = 0;
+	struct cfi_private *cfi;
+	int err;
+
+	ltq_mtd = kzalloc(sizeof(struct ltq_mtd), GFP_KERNEL);
+	platform_set_drvdata(pdev, ltq_mtd);
+
+	ltq_mtd->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!ltq_mtd->res) {
+		dev_err(&pdev->dev, "failed to get memory resource");
+		err = -ENOENT;
+		goto err_out;
+	}
+
+	res = devm_request_mem_region(&pdev->dev, ltq_mtd->res->start,
+		resource_size(ltq_mtd->res), dev_name(&pdev->dev));
+	if (!ltq_mtd->res) {
+		dev_err(&pdev->dev, "failed to request mem resource");
+		err = -EBUSY;
+		goto err_out;
+	}
+
+	ltq_mtd->map = kzalloc(sizeof(struct map_info), GFP_KERNEL);
+	ltq_mtd->map->phys = res->start;
+	ltq_mtd->map->size = resource_size(res);
+	ltq_mtd->map->virt = devm_ioremap_nocache(&pdev->dev,
+				ltq_mtd->map->phys, ltq_mtd->map->size);
+	if (!ltq_mtd->map->virt) {
+		dev_err(&pdev->dev, "failed to ioremap!\n");
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ltq_mtd->map->name = ltq_map_name;
+	ltq_mtd->map->bankwidth = 2;
+	ltq_mtd->map->read = ltq_read16;
+	ltq_mtd->map->write = ltq_write16;
+	ltq_mtd->map->copy_from = ltq_copy_from;
+	ltq_mtd->map->copy_to = ltq_copy_to;
+
+	ltq_mtd->map->map_priv_1 = LTQ_NOR_PROBING;
+	ltq_mtd->mtd = do_map_probe("cfi_probe", ltq_mtd->map);
+	ltq_mtd->map->map_priv_1 = LTQ_NOR_NORMAL;
+
+	if (!ltq_mtd->mtd) {
+		dev_err(&pdev->dev, "probing failed\n");
+		err = -ENXIO;
+		goto err_unmap;
+	}
+
+	ltq_mtd->mtd->owner = THIS_MODULE;
+
+	cfi = ltq_mtd->map->fldrv_priv;
+	cfi->addr_unlock1 ^= 1;
+	cfi->addr_unlock2 ^= 1;
+
+	nr_parts = parse_mtd_partitions(ltq_mtd->mtd,
+				part_probe_types, &parts, 0);
+	if (nr_parts > 0) {
+		dev_info(&pdev->dev,
+			"using %d partitions from cmdline", nr_parts);
+	} else {
+		nr_parts = ltq_mtd_data->nr_parts;
+		parts = ltq_mtd_data->parts;
+	}
+
+	err = add_mtd_partitions(ltq_mtd->mtd, parts, nr_parts);
+	if (err) {
+		dev_err(&pdev->dev, "failed to add partitions\n");
+		goto err_destroy;
+	}
+
+	return 0;
+
+err_destroy:
+	map_destroy(ltq_mtd->mtd);
+err_unmap:
+	iounmap(ltq_mtd->map->virt);
+err_free:
+	kfree(ltq_mtd->map);
+err_out:
+	kfree(ltq_mtd);
+	return err;
+}
+
+static int __devexit
+ltq_mtd_remove(struct platform_device *pdev)
+{
+	struct ltq_mtd *ltq_mtd = platform_get_drvdata(pdev);
+
+	if (ltq_mtd) {
+		if (ltq_mtd->mtd) {
+			del_mtd_partitions(ltq_mtd->mtd);
+			map_destroy(ltq_mtd->mtd);
+		}
+		if (ltq_mtd->map->virt)
+			iounmap(ltq_mtd->map->virt);
+		kfree(ltq_mtd->map);
+		kfree(ltq_mtd);
+	}
+	return 0;
+}
+
+static struct platform_driver ltq_mtd_driver = {
+	.remove = __devexit_p(ltq_mtd_remove),
+	.driver = {
+		.name = "ltq_nor",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init
+init_ltq_mtd(void)
+{
+	int ret = platform_driver_probe(&ltq_mtd_driver, ltq_mtd_probe);
+
+	if (ret)
+		pr_err("ltq_nor: error registering platform driver");
+	return ret;
+}
+
+static void __exit
+exit_ltq_mtd(void)
+{
+	platform_driver_unregister(&ltq_mtd_driver);
+}
+
+module_init(init_ltq_mtd);
+module_exit(exit_ltq_mtd);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_DESCRIPTION("Lantiq SoC NOR");
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index bd483f0c57e1..c1d33464aee8 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -214,11 +214,13 @@ static void __devinit of_free_probes(const char **probes)
 }
 #endif
 
+static struct of_device_id of_flash_match[];
 static int __devinit of_flash_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_MTD_PARTITIONS
 	const char **part_probe_types;
 #endif
+	const struct of_device_id *match;
 	struct device_node *dp = dev->dev.of_node;
 	struct resource res;
 	struct of_flash *info;
@@ -232,9 +234,10 @@ static int __devinit of_flash_probe(struct platform_device *dev)
 	struct mtd_info **mtd_list = NULL;
 	resource_size_t res_size;
 
-	if (!dev->dev.of_match)
+	match = of_match_device(of_flash_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	probe_type = dev->dev.of_match->data;
+	probe_type = match->data;
 
 	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
 
diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 3ffe05db4923..5d513b54a7d7 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -470,7 +471,7 @@ static int __init au1xxx_nand_init(void)
 
 #ifdef CONFIG_MIPS_PB1550
 	/* set gpio206 high */
-	au_writel(au_readl(GPIO2_DIR) & ~(1 << 6), GPIO2_DIR);
+	gpio_direction_input(206);
 
 	boot_swapboot = (au_readl(MEM_STSTAT) & (0x7 << 1)) | ((bcsr_read(BCSR_STATUS) >> 6) & 0x1);
 
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 96c0b34ba8db..657b9f4b6f9b 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -400,7 +400,7 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 	doc200x_hwcontrol(mtd, 0, NAND_CTRL_ALE | NAND_CTRL_CHANGE);
 	doc200x_hwcontrol(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
 
-	/* We can't' use dev_ready here, but at least we wait for the
+	/* We can't use dev_ready here, but at least we wait for the
 	 * command to complete
 	 */
 	udelay(50);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 6c884ef1b069..19f04a34783a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2017,6 +2017,13 @@ config FTMAC100
 	  from Faraday. It is used on Faraday A320, Andes AG101 and some
 	  other ARM/NDS32 SoC's.
 
+config LANTIQ_ETOP
+	tristate "Lantiq SoC ETOP driver"
+	depends on SOC_TYPE_XWAY
+	help
+	  Support for the MII0 inside the Lantiq SoC
+
+
 source "drivers/net/fs_enet/Kconfig"
 
 source "drivers/net/octeon/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index e5a7375685ad..209fbb70619b 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -259,6 +259,7 @@ obj-$(CONFIG_MLX4_CORE) += mlx4/
 obj-$(CONFIG_ENC28J60) += enc28j60.o
 obj-$(CONFIG_ETHOC) += ethoc.o
 obj-$(CONFIG_GRETH) += greth.o
+obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
 
 obj-$(CONFIG_XTENSA_XT2000_SONIC) += xtsonic.o
 
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 82260ca70323..d7c1bfe4b6ec 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -68,6 +68,7 @@
 #include <linux/sockios.h>
 #include <linux/firmware.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 #include <linux/if_vlan.h>
diff --git a/drivers/net/arm/etherh.c b/drivers/net/arm/etherh.c
index e252cd595016..03e217a868d4 100644
--- a/drivers/net/arm/etherh.c
+++ b/drivers/net/arm/etherh.c
@@ -527,7 +527,7 @@ static void __init etherh_banner(void)
  * Read the ethernet address string from the on board rom.
  * This is an ascii string...
  */
-static int __init etherh_addr(char *addr, struct expansion_card *ec)
+static int __devinit etherh_addr(char *addr, struct expansion_card *ec)
 {
 	struct in_chunk_dir cd;
 	char *s;
@@ -656,7 +656,7 @@ static const struct net_device_ops etherh_netdev_ops = {
 static u32 etherh_regoffsets[16];
 static u32 etherm_regoffsets[16];
 
-static int __init
+static int __devinit
 etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
 {
 	const struct etherh_data *data = id->data;
diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c
index ce0091eb06f5..1264d781b554 100644
--- a/drivers/net/atarilance.c
+++ b/drivers/net/atarilance.c
@@ -554,7 +554,7 @@ static unsigned long __init lance_probe1( struct net_device *dev,
 		memaddr == (unsigned short *)0xffe00000) {
 		/* PAMs card and Riebl on ST use level 5 autovector */
 		if (request_irq(IRQ_AUTO_5, lance_interrupt, IRQ_TYPE_PRIO,
-		            "PAM/Riebl-ST Ethernet", dev)) {
+		            "PAM,Riebl-ST Ethernet", dev)) {
 			printk( "Lance: request for irq %d failed\n", IRQ_AUTO_5 );
 			return 0;
 		}
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index bd1d811c204f..5fedc3375562 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -247,8 +247,10 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev,
 }
 #endif /* CONFIG_PPC_MPC512x */
 
+static struct of_device_id mpc5xxx_can_table[];
 static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct mpc5xxx_can_data *data;
 	struct device_node *np = ofdev->dev.of_node;
 	struct net_device *dev;
@@ -258,9 +260,10 @@ static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 	int irq, mscan_clksrc = 0;
 	int err = -ENOMEM;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	data = (struct mpc5xxx_can_data *)ofdev->dev.of_match->data;
+	data = match->data;
 
 	base = of_iomap(np, 0);
 	if (!base) {
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index ba763e0481e3..6a0a8fca62bc 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -41,6 +41,7 @@
 #include <linux/memory.h>
 #include <asm/kexec.h>
 #include <linux/mutex.h>
+#include <linux/prefetch.h>
 
 #include <net/ip.h>
 
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index a9388944f1d3..21abb5c01a56 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -996,8 +996,10 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 #endif
 };
 
+static struct of_device_id fs_enet_match[];
 static int __devinit fs_enet_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct net_device *ndev;
 	struct fs_enet_private *fep;
 	struct fs_platform_info *fpi;
@@ -1005,14 +1007,15 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	const u8 *mac_addr;
 	int privsize, len, ret = -ENODEV;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	fpi = kzalloc(sizeof(*fpi), GFP_KERNEL);
 	if (!fpi)
 		return -ENOMEM;
 
-	if (!IS_FEC(ofdev->dev.of_match)) {
+	if (!IS_FEC(match)) {
 		data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len);
 		if (!data || len != 4)
 			goto out_free_fpi;
@@ -1047,7 +1050,7 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	fep->dev = &ofdev->dev;
 	fep->ndev = ndev;
 	fep->fpi = fpi;
-	fep->ops = ofdev->dev.of_match->data;
+	fep->ops = match->data;
 
 	ret = fep->ops->setup_data(ndev);
 	if (ret)
diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c
index 7e840d373ab3..6a2e150e75bb 100644
--- a/drivers/net/fs_enet/mii-fec.c
+++ b/drivers/net/fs_enet/mii-fec.c
@@ -101,17 +101,20 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
 	return 0;
 }
 
+static struct of_device_id fs_enet_mdio_fec_match[];
 static int __devinit fs_enet_mdio_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct resource res;
 	struct mii_bus *new_bus;
 	struct fec_info *fec;
 	int (*get_bus_freq)(struct device_node *);
 	int ret = -ENOMEM, clock, speed;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	get_bus_freq = ofdev->dev.of_match->data;
+	get_bus_freq = match->data;
 
 	new_bus = mdiobus_alloc();
 	if (!new_bus)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 2dce3d038188..fa01b0b03b77 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5136,11 +5136,6 @@ err_set_interrupt:
 	return err;
 }
 
-static void ring_free_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct ixgbe_ring, rcu));
-}
-
 /**
  * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
  * @adapter: board private structure to clear interrupt scheme on
@@ -5162,7 +5157,7 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
 		/* ixgbe_get_stats64() might access this ring, we must wait
 		 * a grace period before freeing it.
 		 */
-		call_rcu(&ring->rcu, ring_free_rcu);
+		kfree_rcu(ring, rcu);
 		adapter->rx_ring[i] = NULL;
 	}
 
diff --git a/drivers/net/lantiq_etop.c b/drivers/net/lantiq_etop.c
new file mode 100644
index 000000000000..45f252b7da30
--- /dev/null
+++ b/drivers/net/lantiq_etop.c
@@ -0,0 +1,805 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
+#include <linux/ethtool.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <asm/checksum.h>
+
+#include <lantiq_soc.h>
+#include <xway_dma.h>
+#include <lantiq_platform.h>
+
+#define LTQ_ETOP_MDIO		0x11804
+#define MDIO_REQUEST		0x80000000
+#define MDIO_READ		0x40000000
+#define MDIO_ADDR_MASK		0x1f
+#define MDIO_ADDR_OFFSET	0x15
+#define MDIO_REG_MASK		0x1f
+#define MDIO_REG_OFFSET		0x10
+#define MDIO_VAL_MASK		0xffff
+
+#define PPE32_CGEN		0x800
+#define LQ_PPE32_ENET_MAC_CFG	0x1840
+
+#define LTQ_ETOP_ENETS0		0x11850
+#define LTQ_ETOP_MAC_DA0	0x1186C
+#define LTQ_ETOP_MAC_DA1	0x11870
+#define LTQ_ETOP_CFG		0x16020
+#define LTQ_ETOP_IGPLEN		0x16080
+
+#define MAX_DMA_CHAN		0x8
+#define MAX_DMA_CRC_LEN		0x4
+#define MAX_DMA_DATA_LEN	0x600
+
+#define ETOP_FTCU		BIT(28)
+#define ETOP_MII_MASK		0xf
+#define ETOP_MII_NORMAL		0xd
+#define ETOP_MII_REVERSE	0xe
+#define ETOP_PLEN_UNDER		0x40
+#define ETOP_CGEN		0x800
+
+/* use 2 static channels for TX/RX */
+#define LTQ_ETOP_TX_CHANNEL	1
+#define LTQ_ETOP_RX_CHANNEL	6
+#define IS_TX(x)		(x == LTQ_ETOP_TX_CHANNEL)
+#define IS_RX(x)		(x == LTQ_ETOP_RX_CHANNEL)
+
+#define ltq_etop_r32(x)		ltq_r32(ltq_etop_membase + (x))
+#define ltq_etop_w32(x, y)	ltq_w32(x, ltq_etop_membase + (y))
+#define ltq_etop_w32_mask(x, y, z)	\
+		ltq_w32_mask(x, y, ltq_etop_membase + (z))
+
+#define DRV_VERSION	"1.0"
+
+static void __iomem *ltq_etop_membase;
+
+struct ltq_etop_chan {
+	int idx;
+	int tx_free;
+	struct net_device *netdev;
+	struct napi_struct napi;
+	struct ltq_dma_channel dma;
+	struct sk_buff *skb[LTQ_DESC_NUM];
+};
+
+struct ltq_etop_priv {
+	struct net_device *netdev;
+	struct ltq_eth_data *pldata;
+	struct resource *res;
+
+	struct mii_bus *mii_bus;
+	struct phy_device *phydev;
+
+	struct ltq_etop_chan ch[MAX_DMA_CHAN];
+	int tx_free[MAX_DMA_CHAN >> 1];
+
+	spinlock_t lock;
+};
+
+static int
+ltq_etop_alloc_skb(struct ltq_etop_chan *ch)
+{
+	ch->skb[ch->dma.desc] = dev_alloc_skb(MAX_DMA_DATA_LEN);
+	if (!ch->skb[ch->dma.desc])
+		return -ENOMEM;
+	ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(NULL,
+		ch->skb[ch->dma.desc]->data, MAX_DMA_DATA_LEN,
+		DMA_FROM_DEVICE);
+	ch->dma.desc_base[ch->dma.desc].addr =
+		CPHYSADDR(ch->skb[ch->dma.desc]->data);
+	ch->dma.desc_base[ch->dma.desc].ctl =
+		LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) |
+		MAX_DMA_DATA_LEN;
+	skb_reserve(ch->skb[ch->dma.desc], NET_IP_ALIGN);
+	return 0;
+}
+
+static void
+ltq_etop_hw_receive(struct ltq_etop_chan *ch)
+{
+	struct ltq_etop_priv *priv = netdev_priv(ch->netdev);
+	struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+	struct sk_buff *skb = ch->skb[ch->dma.desc];
+	int len = (desc->ctl & LTQ_DMA_SIZE_MASK) - MAX_DMA_CRC_LEN;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (ltq_etop_alloc_skb(ch)) {
+		netdev_err(ch->netdev,
+			"failed to allocate new rx buffer, stopping DMA\n");
+		ltq_dma_close(&ch->dma);
+	}
+	ch->dma.desc++;
+	ch->dma.desc %= LTQ_DESC_NUM;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	skb_put(skb, len);
+	skb->dev = ch->netdev;
+	skb->protocol = eth_type_trans(skb, ch->netdev);
+	netif_receive_skb(skb);
+}
+
+static int
+ltq_etop_poll_rx(struct napi_struct *napi, int budget)
+{
+	struct ltq_etop_chan *ch = container_of(napi,
+				struct ltq_etop_chan, napi);
+	int rx = 0;
+	int complete = 0;
+
+	while ((rx < budget) && !complete) {
+		struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+
+		if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) {
+			ltq_etop_hw_receive(ch);
+			rx++;
+		} else {
+			complete = 1;
+		}
+	}
+	if (complete || !rx) {
+		napi_complete(&ch->napi);
+		ltq_dma_ack_irq(&ch->dma);
+	}
+	return rx;
+}
+
+static int
+ltq_etop_poll_tx(struct napi_struct *napi, int budget)
+{
+	struct ltq_etop_chan *ch =
+		container_of(napi, struct ltq_etop_chan, napi);
+	struct ltq_etop_priv *priv = netdev_priv(ch->netdev);
+	struct netdev_queue *txq =
+		netdev_get_tx_queue(ch->netdev, ch->idx >> 1);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	while ((ch->dma.desc_base[ch->tx_free].ctl &
+			(LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) {
+		dev_kfree_skb_any(ch->skb[ch->tx_free]);
+		ch->skb[ch->tx_free] = NULL;
+		memset(&ch->dma.desc_base[ch->tx_free], 0,
+			sizeof(struct ltq_dma_desc));
+		ch->tx_free++;
+		ch->tx_free %= LTQ_DESC_NUM;
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (netif_tx_queue_stopped(txq))
+		netif_tx_start_queue(txq);
+	napi_complete(&ch->napi);
+	ltq_dma_ack_irq(&ch->dma);
+	return 1;
+}
+
+static irqreturn_t
+ltq_etop_dma_irq(int irq, void *_priv)
+{
+	struct ltq_etop_priv *priv = _priv;
+	int ch = irq - LTQ_DMA_CH0_INT;
+
+	napi_schedule(&priv->ch[ch].napi);
+	return IRQ_HANDLED;
+}
+
+static void
+ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	ltq_dma_free(&ch->dma);
+	if (ch->dma.irq)
+		free_irq(ch->dma.irq, priv);
+	if (IS_RX(ch->idx)) {
+		int desc;
+		for (desc = 0; desc < LTQ_DESC_NUM; desc++)
+			dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+	}
+}
+
+static void
+ltq_etop_hw_exit(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	ltq_pmu_disable(PMU_PPE);
+	for (i = 0; i < MAX_DMA_CHAN; i++)
+		if (IS_TX(i) || IS_RX(i))
+			ltq_etop_free_channel(dev, &priv->ch[i]);
+}
+
+static int
+ltq_etop_hw_init(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	ltq_pmu_enable(PMU_PPE);
+
+	switch (priv->pldata->mii_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		ltq_etop_w32_mask(ETOP_MII_MASK,
+			ETOP_MII_REVERSE, LTQ_ETOP_CFG);
+		break;
+
+	case PHY_INTERFACE_MODE_MII:
+		ltq_etop_w32_mask(ETOP_MII_MASK,
+			ETOP_MII_NORMAL, LTQ_ETOP_CFG);
+		break;
+
+	default:
+		netdev_err(dev, "unknown mii mode %d\n",
+			priv->pldata->mii_mode);
+		return -ENOTSUPP;
+	}
+
+	/* enable crc generation */
+	ltq_etop_w32(PPE32_CGEN, LQ_PPE32_ENET_MAC_CFG);
+
+	ltq_dma_init_port(DMA_PORT_ETOP);
+
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		int irq = LTQ_DMA_CH0_INT + i;
+		struct ltq_etop_chan *ch = &priv->ch[i];
+
+		ch->idx = ch->dma.nr = i;
+
+		if (IS_TX(i)) {
+			ltq_dma_alloc_tx(&ch->dma);
+			request_irq(irq, ltq_etop_dma_irq, IRQF_DISABLED,
+				"etop_tx", priv);
+		} else if (IS_RX(i)) {
+			ltq_dma_alloc_rx(&ch->dma);
+			for (ch->dma.desc = 0; ch->dma.desc < LTQ_DESC_NUM;
+					ch->dma.desc++)
+				if (ltq_etop_alloc_skb(ch))
+					return -ENOMEM;
+			ch->dma.desc = 0;
+			request_irq(irq, ltq_etop_dma_irq, IRQF_DISABLED,
+				"etop_rx", priv);
+		}
+		ch->dma.irq = irq;
+	}
+	return 0;
+}
+
+static void
+ltq_etop_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "Lantiq ETOP");
+	strcpy(info->bus_info, "internal");
+	strcpy(info->version, DRV_VERSION);
+}
+
+static int
+ltq_etop_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	return phy_ethtool_gset(priv->phydev, cmd);
+}
+
+static int
+ltq_etop_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	return phy_ethtool_sset(priv->phydev, cmd);
+}
+
+static int
+ltq_etop_nway_reset(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	return phy_start_aneg(priv->phydev);
+}
+
+static const struct ethtool_ops ltq_etop_ethtool_ops = {
+	.get_drvinfo = ltq_etop_get_drvinfo,
+	.get_settings = ltq_etop_get_settings,
+	.set_settings = ltq_etop_set_settings,
+	.nway_reset = ltq_etop_nway_reset,
+};
+
+static int
+ltq_etop_mdio_wr(struct mii_bus *bus, int phy_addr, int phy_reg, u16 phy_data)
+{
+	u32 val = MDIO_REQUEST |
+		((phy_addr & MDIO_ADDR_MASK) << MDIO_ADDR_OFFSET) |
+		((phy_reg & MDIO_REG_MASK) << MDIO_REG_OFFSET) |
+		phy_data;
+
+	while (ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_REQUEST)
+		;
+	ltq_etop_w32(val, LTQ_ETOP_MDIO);
+	return 0;
+}
+
+static int
+ltq_etop_mdio_rd(struct mii_bus *bus, int phy_addr, int phy_reg)
+{
+	u32 val = MDIO_REQUEST | MDIO_READ |
+		((phy_addr & MDIO_ADDR_MASK) << MDIO_ADDR_OFFSET) |
+		((phy_reg & MDIO_REG_MASK) << MDIO_REG_OFFSET);
+
+	while (ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_REQUEST)
+		;
+	ltq_etop_w32(val, LTQ_ETOP_MDIO);
+	while (ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_REQUEST)
+		;
+	val = ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_VAL_MASK;
+	return val;
+}
+
+static void
+ltq_etop_mdio_link(struct net_device *dev)
+{
+	/* nothing to do  */
+}
+
+static int
+ltq_etop_mdio_probe(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = NULL;
+	int phy_addr;
+
+	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
+		if (priv->mii_bus->phy_map[phy_addr]) {
+			phydev = priv->mii_bus->phy_map[phy_addr];
+			break;
+		}
+	}
+
+	if (!phydev) {
+		netdev_err(dev, "no PHY found\n");
+		return -ENODEV;
+	}
+
+	phydev = phy_connect(dev, dev_name(&phydev->dev), &ltq_etop_mdio_link,
+			0, priv->pldata->mii_mode);
+
+	if (IS_ERR(phydev)) {
+		netdev_err(dev, "Could not attach to PHY\n");
+		return PTR_ERR(phydev);
+	}
+
+	phydev->supported &= (SUPPORTED_10baseT_Half
+			      | SUPPORTED_10baseT_Full
+			      | SUPPORTED_100baseT_Half
+			      | SUPPORTED_100baseT_Full
+			      | SUPPORTED_Autoneg
+			      | SUPPORTED_MII
+			      | SUPPORTED_TP);
+
+	phydev->advertising = phydev->supported;
+	priv->phydev = phydev;
+	pr_info("%s: attached PHY [%s] (phy_addr=%s, irq=%d)\n",
+	       dev->name, phydev->drv->name,
+	       dev_name(&phydev->dev), phydev->irq);
+
+	return 0;
+}
+
+static int
+ltq_etop_mdio_init(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+	int err;
+
+	priv->mii_bus = mdiobus_alloc();
+	if (!priv->mii_bus) {
+		netdev_err(dev, "failed to allocate mii bus\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	priv->mii_bus->priv = dev;
+	priv->mii_bus->read = ltq_etop_mdio_rd;
+	priv->mii_bus->write = ltq_etop_mdio_wr;
+	priv->mii_bus->name = "ltq_mii";
+	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0);
+	priv->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
+	if (!priv->mii_bus->irq) {
+		err = -ENOMEM;
+		goto err_out_free_mdiobus;
+	}
+
+	for (i = 0; i < PHY_MAX_ADDR; ++i)
+		priv->mii_bus->irq[i] = PHY_POLL;
+
+	if (mdiobus_register(priv->mii_bus)) {
+		err = -ENXIO;
+		goto err_out_free_mdio_irq;
+	}
+
+	if (ltq_etop_mdio_probe(dev)) {
+		err = -ENXIO;
+		goto err_out_unregister_bus;
+	}
+	return 0;
+
+err_out_unregister_bus:
+	mdiobus_unregister(priv->mii_bus);
+err_out_free_mdio_irq:
+	kfree(priv->mii_bus->irq);
+err_out_free_mdiobus:
+	mdiobus_free(priv->mii_bus);
+err_out:
+	return err;
+}
+
+static void
+ltq_etop_mdio_cleanup(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	phy_disconnect(priv->phydev);
+	mdiobus_unregister(priv->mii_bus);
+	kfree(priv->mii_bus->irq);
+	mdiobus_free(priv->mii_bus);
+}
+
+static int
+ltq_etop_open(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		struct ltq_etop_chan *ch = &priv->ch[i];
+
+		if (!IS_TX(i) && (!IS_RX(i)))
+			continue;
+		ltq_dma_open(&ch->dma);
+		napi_enable(&ch->napi);
+	}
+	phy_start(priv->phydev);
+	netif_tx_start_all_queues(dev);
+	return 0;
+}
+
+static int
+ltq_etop_stop(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	netif_tx_stop_all_queues(dev);
+	phy_stop(priv->phydev);
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		struct ltq_etop_chan *ch = &priv->ch[i];
+
+		if (!IS_RX(i) && !IS_TX(i))
+			continue;
+		napi_disable(&ch->napi);
+		ltq_dma_close(&ch->dma);
+	}
+	return 0;
+}
+
+static int
+ltq_etop_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	int queue = skb_get_queue_mapping(skb);
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue);
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	struct ltq_etop_chan *ch = &priv->ch[(queue << 1) | 1];
+	struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+	int len;
+	unsigned long flags;
+	u32 byte_offset;
+
+	len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len;
+
+	if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) || ch->skb[ch->dma.desc]) {
+		dev_kfree_skb_any(skb);
+		netdev_err(dev, "tx ring full\n");
+		netif_tx_stop_queue(txq);
+		return NETDEV_TX_BUSY;
+	}
+
+	/* dma needs to start on a 16 byte aligned address */
+	byte_offset = CPHYSADDR(skb->data) % 16;
+	ch->skb[ch->dma.desc] = skb;
+
+	dev->trans_start = jiffies;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	desc->addr = ((unsigned int) dma_map_single(NULL, skb->data, len,
+						DMA_TO_DEVICE)) - byte_offset;
+	wmb();
+	desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP |
+		LTQ_DMA_TX_OFFSET(byte_offset) | (len & LTQ_DMA_SIZE_MASK);
+	ch->dma.desc++;
+	ch->dma.desc %= LTQ_DESC_NUM;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (ch->dma.desc_base[ch->dma.desc].ctl & LTQ_DMA_OWN)
+		netif_tx_stop_queue(txq);
+
+	return NETDEV_TX_OK;
+}
+
+static int
+ltq_etop_change_mtu(struct net_device *dev, int new_mtu)
+{
+	int ret = eth_change_mtu(dev, new_mtu);
+
+	if (!ret) {
+		struct ltq_etop_priv *priv = netdev_priv(dev);
+		unsigned long flags;
+
+		spin_lock_irqsave(&priv->lock, flags);
+		ltq_etop_w32((ETOP_PLEN_UNDER << 16) | new_mtu,
+			LTQ_ETOP_IGPLEN);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
+	return ret;
+}
+
+static int
+ltq_etop_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	/* TODO: mii-toll reports "No MII transceiver present!." ?!*/
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
+}
+
+static int
+ltq_etop_set_mac_address(struct net_device *dev, void *p)
+{
+	int ret = eth_mac_addr(dev, p);
+
+	if (!ret) {
+		struct ltq_etop_priv *priv = netdev_priv(dev);
+		unsigned long flags;
+
+		/* store the mac for the unicast filter */
+		spin_lock_irqsave(&priv->lock, flags);
+		ltq_etop_w32(*((u32 *)dev->dev_addr), LTQ_ETOP_MAC_DA0);
+		ltq_etop_w32(*((u16 *)&dev->dev_addr[4]) << 16,
+			LTQ_ETOP_MAC_DA1);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
+	return ret;
+}
+
+static void
+ltq_etop_set_multicast_list(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	unsigned long flags;
+
+	/* ensure that the unicast filter is not enabled in promiscious mode */
+	spin_lock_irqsave(&priv->lock, flags);
+	if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI))
+		ltq_etop_w32_mask(ETOP_FTCU, 0, LTQ_ETOP_ENETS0);
+	else
+		ltq_etop_w32_mask(0, ETOP_FTCU, LTQ_ETOP_ENETS0);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static u16
+ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	/* we are currently only using the first queue */
+	return 0;
+}
+
+static int
+ltq_etop_init(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	struct sockaddr mac;
+	int err;
+
+	ether_setup(dev);
+	dev->watchdog_timeo = 10 * HZ;
+	err = ltq_etop_hw_init(dev);
+	if (err)
+		goto err_hw;
+	ltq_etop_change_mtu(dev, 1500);
+
+	memcpy(&mac, &priv->pldata->mac, sizeof(struct sockaddr));
+	if (!is_valid_ether_addr(mac.sa_data)) {
+		pr_warn("etop: invalid MAC, using random\n");
+		random_ether_addr(mac.sa_data);
+	}
+
+	err = ltq_etop_set_mac_address(dev, &mac);
+	if (err)
+		goto err_netdev;
+	ltq_etop_set_multicast_list(dev);
+	err = ltq_etop_mdio_init(dev);
+	if (err)
+		goto err_netdev;
+	return 0;
+
+err_netdev:
+	unregister_netdev(dev);
+	free_netdev(dev);
+err_hw:
+	ltq_etop_hw_exit(dev);
+	return err;
+}
+
+static void
+ltq_etop_tx_timeout(struct net_device *dev)
+{
+	int err;
+
+	ltq_etop_hw_exit(dev);
+	err = ltq_etop_hw_init(dev);
+	if (err)
+		goto err_hw;
+	dev->trans_start = jiffies;
+	netif_wake_queue(dev);
+	return;
+
+err_hw:
+	ltq_etop_hw_exit(dev);
+	netdev_err(dev, "failed to restart etop after TX timeout\n");
+}
+
+static const struct net_device_ops ltq_eth_netdev_ops = {
+	.ndo_open = ltq_etop_open,
+	.ndo_stop = ltq_etop_stop,
+	.ndo_start_xmit = ltq_etop_tx,
+	.ndo_change_mtu = ltq_etop_change_mtu,
+	.ndo_do_ioctl = ltq_etop_ioctl,
+	.ndo_set_mac_address = ltq_etop_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_set_multicast_list = ltq_etop_set_multicast_list,
+	.ndo_select_queue = ltq_etop_select_queue,
+	.ndo_init = ltq_etop_init,
+	.ndo_tx_timeout = ltq_etop_tx_timeout,
+};
+
+static int __init
+ltq_etop_probe(struct platform_device *pdev)
+{
+	struct net_device *dev;
+	struct ltq_etop_priv *priv;
+	struct resource *res;
+	int err;
+	int i;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get etop resource\n");
+		err = -ENOENT;
+		goto err_out;
+	}
+
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request etop resource\n");
+		err = -EBUSY;
+		goto err_out;
+	}
+
+	ltq_etop_membase = devm_ioremap_nocache(&pdev->dev,
+		res->start, resource_size(res));
+	if (!ltq_etop_membase) {
+		dev_err(&pdev->dev, "failed to remap etop engine %d\n",
+			pdev->id);
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	dev = alloc_etherdev_mq(sizeof(struct ltq_etop_priv), 4);
+	strcpy(dev->name, "eth%d");
+	dev->netdev_ops = &ltq_eth_netdev_ops;
+	dev->ethtool_ops = &ltq_etop_ethtool_ops;
+	priv = netdev_priv(dev);
+	priv->res = res;
+	priv->pldata = dev_get_platdata(&pdev->dev);
+	priv->netdev = dev;
+	spin_lock_init(&priv->lock);
+
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		if (IS_TX(i))
+			netif_napi_add(dev, &priv->ch[i].napi,
+				ltq_etop_poll_tx, 8);
+		else if (IS_RX(i))
+			netif_napi_add(dev, &priv->ch[i].napi,
+				ltq_etop_poll_rx, 32);
+		priv->ch[i].netdev = dev;
+	}
+
+	err = register_netdev(dev);
+	if (err)
+		goto err_free;
+
+	platform_set_drvdata(pdev, dev);
+	return 0;
+
+err_free:
+	kfree(dev);
+err_out:
+	return err;
+}
+
+static int __devexit
+ltq_etop_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+
+	if (dev) {
+		netif_tx_stop_all_queues(dev);
+		ltq_etop_hw_exit(dev);
+		ltq_etop_mdio_cleanup(dev);
+		unregister_netdev(dev);
+	}
+	return 0;
+}
+
+static struct platform_driver ltq_mii_driver = {
+	.remove = __devexit_p(ltq_etop_remove),
+	.driver = {
+		.name = "ltq_etop",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init
+init_ltq_etop(void)
+{
+	int ret = platform_driver_probe(&ltq_mii_driver, ltq_etop_probe);
+
+	if (ret)
+		pr_err("ltq_etop: Error registering platfom driver!");
+	return ret;
+}
+
+static void __exit
+exit_ltq_etop(void)
+{
+	platform_driver_unregister(&ltq_mii_driver);
+}
+
+module_init(init_ltq_etop);
+module_exit(exit_ltq_etop);
+
+MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_DESCRIPTION("Lantiq SoC ETOP");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index bbcf80afaf16..d72a70615c0f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -590,21 +590,13 @@ static int macvlan_port_create(struct net_device *dev)
 	return err;
 }
 
-static void macvlan_port_rcu_free(struct rcu_head *head)
-{
-	struct macvlan_port *port;
-
-	port = container_of(head, struct macvlan_port, rcu);
-	kfree(port);
-}
-
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = macvlan_port_get(dev);
 
 	dev->priv_flags &= ~IFF_MACVLAN_PORT;
 	netdev_rx_handler_unregister(dev);
-	call_rcu(&port->rcu, macvlan_port_rcu_free);
+	kfree_rcu(port, rcu);
 }
 
 static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index d381a0f9ee18..30aad54b1b3a 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -3238,15 +3238,18 @@ static void happy_meal_pci_exit(void)
 #endif
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id hme_sbus_match[];
 static int __devinit hme_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct device_node *dp = op->dev.of_node;
 	const char *model = of_get_property(dp, "model", NULL);
 	int is_qfe;
 
-	if (!op->dev.of_match)
+	match = of_match_device(hme_sbus_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	is_qfe = (op->dev.of_match->data != NULL);
+	is_qfe = (match->data != NULL);
 
 	if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe"))
 		is_qfe = 1;
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 75b0d3cb7676..9f689f1da0fc 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
  * Returns a pointer to the interrupt parent node, or NULL if the interrupt
  * parent could not be determined.
  */
-static struct device_node *of_irq_find_parent(struct device_node *child)
+struct device_node *of_irq_find_parent(struct device_node *child)
 {
 	struct device_node *p;
 	const __be32 *parp;
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index a3755ffc03d4..bc8ce48f0778 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2550,7 +2550,6 @@ static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
 					 const struct parport_pc_via_data *via)
 {
 	short inta_addr[6] = { 0x2A0, 0x2C0, 0x220, 0x240, 0x1E0 };
-	struct resource *base_res;
 	u32 ite8872set;
 	u32 ite8872_lpt, ite8872_lpthi;
 	u8 ite8872_irq, type;
@@ -2561,8 +2560,7 @@ static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
 
 	/* make sure which one chip */
 	for (i = 0; i < 5; i++) {
-		base_res = request_region(inta_addr[i], 32, "it887x");
-		if (base_res) {
+		if (request_region(inta_addr[i], 32, "it887x")) {
 			int test;
 			pci_write_config_dword(pdev, 0x60,
 						0xe5000000 | inta_addr[i]);
@@ -2571,7 +2569,7 @@ static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
 			test = inb(inta_addr[i]);
 			if (test != 0xff)
 				break;
-			release_region(inta_addr[i], 0x8);
+			release_region(inta_addr[i], 32);
 		}
 	}
 	if (i >= 5) {
@@ -2635,7 +2633,7 @@ static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
 	/*
 	 * Release the resource so that parport_pc_probe_port can get it.
 	 */
-	release_resource(base_res);
+	release_region(inta_addr[i], 32);
 	if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi,
 				   irq, PARPORT_DMA_NONE, &pdev->dev, 0)) {
 		printk(KERN_INFO
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index c8ff646c0b05..0fa466a91bf4 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -88,4 +88,6 @@ config PCI_IOAPIC
 	depends on HOTPLUG
 	default y
 
-select NLS if (DMI || ACPI)
+config PCI_LABEL
+	def_bool y if (DMI || ACPI)
+	select NLS
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 98d61c8e984b..c85f744270a5 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -56,10 +56,10 @@ obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o
 # ACPI Related PCI FW Functions
 # ACPI _DSM provided firmware instance and string name
 #
-obj-$(CONFIG_ACPI)    += pci-acpi.o pci-label.o
+obj-$(CONFIG_ACPI)    += pci-acpi.o
 
 # SMBIOS provided firmware instance and labels
-obj-$(CONFIG_DMI)    += pci-label.o
+obj-$(CONFIG_PCI_LABEL) += pci-label.o
 
 # Cardbus & CompactPCI use setup-bus
 obj-$(CONFIG_HOTPLUG) += setup-bus.o
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 505c1c7075f0..6af6b628175b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <linux/dmi.h>
+#include <linux/pci-ats.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 #include "pci.h"
@@ -1299,7 +1300,7 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 static struct iova_domain reserved_iova_list;
 static struct lock_class_key reserved_rbtree_key;
 
-static void dmar_init_reserved_ranges(void)
+static int dmar_init_reserved_ranges(void)
 {
 	struct pci_dev *pdev = NULL;
 	struct iova *iova;
@@ -1313,8 +1314,10 @@ static void dmar_init_reserved_ranges(void)
 	/* IOAPIC ranges shouldn't be accessed by DMA */
 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
 		IOVA_PFN(IOAPIC_RANGE_END));
-	if (!iova)
+	if (!iova) {
 		printk(KERN_ERR "Reserve IOAPIC range failed\n");
+		return -ENODEV;
+	}
 
 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
 	for_each_pci_dev(pdev) {
@@ -1327,11 +1330,13 @@ static void dmar_init_reserved_ranges(void)
 			iova = reserve_iova(&reserved_iova_list,
 					    IOVA_PFN(r->start),
 					    IOVA_PFN(r->end));
-			if (!iova)
+			if (!iova) {
 				printk(KERN_ERR "Reserve iova failed\n");
+				return -ENODEV;
+			}
 		}
 	}
-
+	return 0;
 }
 
 static void domain_reserve_special_ranges(struct dmar_domain *domain)
@@ -1835,7 +1840,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 
 	ret = iommu_attach_domain(domain, iommu);
 	if (ret) {
-		domain_exit(domain);
+		free_domain_mem(domain);
 		goto error;
 	}
 
@@ -2213,7 +2218,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
 	return 0;
 }
 
-int __init init_dmars(void)
+static int __init init_dmars(int force_on)
 {
 	struct dmar_drhd_unit *drhd;
 	struct dmar_rmrr_unit *rmrr;
@@ -2393,8 +2398,15 @@ int __init init_dmars(void)
 	 *   enable translation
 	 */
 	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
+		if (drhd->ignored) {
+			/*
+			 * we always have to disable PMRs or DMA may fail on
+			 * this device
+			 */
+			if (force_on)
+				iommu_disable_protect_mem_regions(drhd->iommu);
 			continue;
+		}
 		iommu = drhd->iommu;
 
 		iommu_flush_write_buffer(iommu);
@@ -3240,9 +3252,15 @@ static int device_notifier(struct notifier_block *nb,
 	if (!domain)
 		return 0;
 
-	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through)
+	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
 		domain_remove_one_dev_info(domain, pdev);
 
+		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
+		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
+		    list_empty(&domain->devices))
+			domain_exit(domain);
+	}
+
 	return 0;
 }
 
@@ -3277,12 +3295,21 @@ int __init intel_iommu_init(void)
 	if (no_iommu || dmar_disabled)
 		return -ENODEV;
 
-	iommu_init_mempool();
-	dmar_init_reserved_ranges();
+	if (iommu_init_mempool()) {
+		if (force_on)
+			panic("tboot: Failed to initialize iommu memory\n");
+		return 	-ENODEV;
+	}
+
+	if (dmar_init_reserved_ranges()) {
+		if (force_on)
+			panic("tboot: Failed to reserve iommu ranges\n");
+		return 	-ENODEV;
+	}
 
 	init_no_remapping_devices();
 
-	ret = init_dmars();
+	ret = init_dmars(force_on);
 	if (ret) {
 		if (force_on)
 			panic("tboot: Failed to initialize DMARs\n");
@@ -3391,6 +3418,11 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
 		domain->iommu_count--;
 		domain_update_iommu_cap(domain);
 		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
+
+		spin_lock_irqsave(&iommu->lock, tmp_flags);
+		clear_bit(domain->id, iommu->domain_ids);
+		iommu->domains[domain->id] = NULL;
+		spin_unlock_irqrestore(&iommu->lock, tmp_flags);
 	}
 
 	spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3607,9 +3639,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 
 		pte = dmar_domain->pgd;
 		if (dma_pte_present(pte)) {
-			free_pgtable_page(dmar_domain->pgd);
 			dmar_domain->pgd = (struct dma_pte *)
 				phys_to_virt(dma_pte_addr(pte));
+			free_pgtable_page(pte);
 		}
 		dmar_domain->agaw--;
 	}
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 553d8ee55c1c..42fae4776515 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -13,6 +13,7 @@
 #include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/pci-ats.h>
 #include "pci.h"
 
 #define VIRTFN_ID_LEN	16
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index d86ea8b01137..135df164a4c1 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -781,7 +781,7 @@ static int pci_pm_resume(struct device *dev)
 
 #endif /* !CONFIG_SUSPEND */
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 
 static int pci_pm_freeze(struct device *dev)
 {
@@ -970,7 +970,7 @@ static int pci_pm_restore(struct device *dev)
 	return error;
 }
 
-#else /* !CONFIG_HIBERNATION */
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define pci_pm_freeze		NULL
 #define pci_pm_freeze_noirq	NULL
@@ -981,7 +981,7 @@ static int pci_pm_restore(struct device *dev)
 #define pci_pm_restore		NULL
 #define pci_pm_restore_noirq	NULL
 
-#endif /* !CONFIG_HIBERNATION */
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #ifdef CONFIG_PM_RUNTIME
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index a6ec200fe5ee..4020025f854e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -250,15 +250,6 @@ struct pci_sriov {
 	u8 __iomem *mstate;	/* VF Migration State Array */
 };
 
-/* Address Translation Service */
-struct pci_ats {
-	int pos;	/* capability position */
-	int stu;	/* Smallest Translation Unit */
-	int qdep;	/* Invalidate Queue Depth */
-	int ref_cnt;	/* Physical Function reference count */
-	unsigned int is_enabled:1;	/* Enable bit is set */
-};
-
 #ifdef CONFIG_PCI_IOV
 extern int pci_iov_init(struct pci_dev *dev);
 extern void pci_iov_release(struct pci_dev *dev);
@@ -269,19 +260,6 @@ extern resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev,
 extern void pci_restore_iov_state(struct pci_dev *dev);
 extern int pci_iov_bus_range(struct pci_bus *bus);
 
-extern int pci_enable_ats(struct pci_dev *dev, int ps);
-extern void pci_disable_ats(struct pci_dev *dev);
-extern int pci_ats_queue_depth(struct pci_dev *dev);
-/**
- * pci_ats_enabled - query the ATS status
- * @dev: the PCI device
- *
- * Returns 1 if ATS capability is enabled, or 0 if not.
- */
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return dev->ats && dev->ats->is_enabled;
-}
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -304,21 +282,6 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
 	return 0;
 }
 
-static inline int pci_enable_ats(struct pci_dev *dev, int ps)
-{
-	return -ENODEV;
-}
-static inline void pci_disable_ats(struct pci_dev *dev)
-{
-}
-static inline int pci_ats_queue_depth(struct pci_dev *dev)
-{
-	return -ENODEV;
-}
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return 0;
-}
 #endif /* CONFIG_PCI_IOV */
 
 static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index ebf51ad1b714..a806cb321d2e 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -579,7 +579,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
 	}
 	size0 = calculate_iosize(size, min_size, size1,
 			resource_size(b_res), 4096);
-	size1 = !add_size? size0:
+	size1 = (!add_head || (add_head && !add_size)) ? size0 :
 		calculate_iosize(size, min_size+add_size, size1,
 			resource_size(b_res), 4096);
 	if (!size0 && !size1) {
@@ -677,7 +677,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 		align += aligns[order];
 	}
 	size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align);
-	size1 = !add_size ? size :
+	size1 = (!add_head || (add_head && !add_size)) ? size0 :
 		calculate_memsize(size, min_size+add_size, 0,
 				resource_size(b_res), min_align);
 	if (!size0 && !size1) {
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index fe77e8223841..e8c19def1b0f 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -173,7 +173,7 @@ static int pcmcia_access_config(struct pcmcia_device *p_dev,
 	c = p_dev->function_config;
 
 	if (!(c->state & CONFIG_LOCKED)) {
-		dev_dbg(&p_dev->dev, "Configuration isn't't locked\n");
+		dev_dbg(&p_dev->dev, "Configuration isn't locked\n");
 		mutex_unlock(&s->ops_mutex);
 		return -EACCES;
 	}
diff --git a/drivers/pcmcia/pxa2xx_balloon3.c b/drivers/pcmcia/pxa2xx_balloon3.c
index 453c54c97612..4c3e94c0ae85 100644
--- a/drivers/pcmcia/pxa2xx_balloon3.c
+++ b/drivers/pcmcia/pxa2xx_balloon3.c
@@ -25,6 +25,8 @@
 
 #include <mach/balloon3.h>
 
+#include <asm/mach-types.h>
+
 #include "soc_common.h"
 
 /*
@@ -127,6 +129,9 @@ static int __init balloon3_pcmcia_init(void)
 {
 	int ret;
 
+	if (!machine_is_balloon3())
+		return -ENODEV;
+
 	balloon3_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
 	if (!balloon3_pcmcia_device)
 		return -ENOMEM;
diff --git a/drivers/pcmcia/pxa2xx_trizeps4.c b/drivers/pcmcia/pxa2xx_trizeps4.c
index b7e596620db1..b829e655457b 100644
--- a/drivers/pcmcia/pxa2xx_trizeps4.c
+++ b/drivers/pcmcia/pxa2xx_trizeps4.c
@@ -69,15 +69,15 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
 	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
 		if (irqs[i].sock != skt->nr)
 			continue;
-		if (gpio_request(IRQ_TO_GPIO(irqs[i].irq), irqs[i].str) < 0) {
+		if (gpio_request(irq_to_gpio(irqs[i].irq), irqs[i].str) < 0) {
 			pr_err("%s: sock %d unable to request gpio %d\n",
-				__func__, skt->nr, IRQ_TO_GPIO(irqs[i].irq));
+				__func__, skt->nr, irq_to_gpio(irqs[i].irq));
 			ret = -EBUSY;
 			goto error;
 		}
-		if (gpio_direction_input(IRQ_TO_GPIO(irqs[i].irq)) < 0) {
+		if (gpio_direction_input(irq_to_gpio(irqs[i].irq)) < 0) {
 			pr_err("%s: sock %d unable to set input gpio %d\n",
-				__func__, skt->nr, IRQ_TO_GPIO(irqs[i].irq));
+				__func__, skt->nr, irq_to_gpio(irqs[i].irq));
 			ret = -EINVAL;
 			goto error;
 		}
@@ -86,7 +86,7 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
 
 error:
 	for (; i >= 0; i--) {
-		gpio_free(IRQ_TO_GPIO(irqs[i].irq));
+		gpio_free(irq_to_gpio(irqs[i].irq));
 	}
 	return (ret);
 }
@@ -97,7 +97,7 @@ static void trizeps_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
 	/* free allocated gpio's */
 	gpio_free(GPIO_PRDY);
 	for (i = 0; i < ARRAY_SIZE(irqs); i++)
-		gpio_free(IRQ_TO_GPIO(irqs[i].irq));
+		gpio_free(irq_to_gpio(irqs[i].irq));
 }
 
 static unsigned long trizeps_pcmcia_status[2];
@@ -226,6 +226,9 @@ static int __init trizeps_pcmcia_init(void)
 {
 	int ret;
 
+	if (!machine_is_trizeps4() && !machine_is_trizeps4wl())
+		return -ENODEV;
+
 	trizeps_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
 	if (!trizeps_pcmcia_device)
 		return -ENOMEM;
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 2ee442c2a5db..0485e394712a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -187,7 +187,8 @@ config MSI_LAPTOP
 	depends on ACPI
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on RFKILL
-	depends on SERIO_I8042
+	depends on INPUT && SERIO_I8042
+	select INPUT_SPARSEKMAP
 	---help---
 	  This is a driver for laptops built by MSI (MICRO-STAR
 	  INTERNATIONAL):
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 5ea6c3477d17..ac4e7f83ce6c 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -89,7 +89,7 @@ MODULE_LICENSE("GPL");
 #define ACERWMID_EVENT_GUID "676AA15E-6A47-4D9F-A2CC-1E6D18D14026"
 
 MODULE_ALIAS("wmi:67C3371D-95A3-4C37-BB61-DD47B491DAAB");
-MODULE_ALIAS("wmi:6AF4F258-B401-42Fd-BE91-3D4AC2D7C0D3");
+MODULE_ALIAS("wmi:6AF4F258-B401-42FD-BE91-3D4AC2D7C0D3");
 MODULE_ALIAS("wmi:676AA15E-6A47-4D9F-A2CC-1E6D18D14026");
 
 enum acer_wmi_event_ids {
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index efc776cb0c66..832a3fd7c1c8 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -201,8 +201,8 @@ static int asus_wmi_input_init(struct asus_wmi *asus)
 	if (!asus->inputdev)
 		return -ENOMEM;
 
-	asus->inputdev->name = asus->driver->input_phys;
-	asus->inputdev->phys = asus->driver->input_name;
+	asus->inputdev->name = asus->driver->input_name;
+	asus->inputdev->phys = asus->driver->input_phys;
 	asus->inputdev->id.bustype = BUS_HOST;
 	asus->inputdev->dev.parent = &asus->platform_device->dev;
 
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 5f2dd386152b..2c1abf63957f 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -585,8 +585,9 @@ static bool eeepc_wlan_rfkill_blocked(struct eeepc_laptop *eeepc)
 	return true;
 }
 
-static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc)
+static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle)
 {
+	struct pci_dev *port;
 	struct pci_dev *dev;
 	struct pci_bus *bus;
 	bool blocked = eeepc_wlan_rfkill_blocked(eeepc);
@@ -599,9 +600,16 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc)
 	mutex_lock(&eeepc->hotplug_lock);
 
 	if (eeepc->hotplug_slot) {
-		bus = pci_find_bus(0, 1);
+		port = acpi_get_pci_dev(handle);
+		if (!port) {
+			pr_warning("Unable to find port\n");
+			goto out_unlock;
+		}
+
+		bus = port->subordinate;
+
 		if (!bus) {
-			pr_warning("Unable to find PCI bus 1?\n");
+			pr_warning("Unable to find PCI bus?\n");
 			goto out_unlock;
 		}
 
@@ -609,6 +617,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc)
 			pr_err("Unable to read PCI config space?\n");
 			goto out_unlock;
 		}
+
 		absent = (l == 0xffffffff);
 
 		if (blocked != absent) {
@@ -647,6 +656,17 @@ out_unlock:
 	mutex_unlock(&eeepc->hotplug_lock);
 }
 
+static void eeepc_rfkill_hotplug_update(struct eeepc_laptop *eeepc, char *node)
+{
+	acpi_status status = AE_OK;
+	acpi_handle handle;
+
+	status = acpi_get_handle(NULL, node, &handle);
+
+	if (ACPI_SUCCESS(status))
+		eeepc_rfkill_hotplug(eeepc, handle);
+}
+
 static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
 {
 	struct eeepc_laptop *eeepc = data;
@@ -654,7 +674,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
 	if (event != ACPI_NOTIFY_BUS_CHECK)
 		return;
 
-	eeepc_rfkill_hotplug(eeepc);
+	eeepc_rfkill_hotplug(eeepc, handle);
 }
 
 static int eeepc_register_rfkill_notifier(struct eeepc_laptop *eeepc,
@@ -672,6 +692,11 @@ static int eeepc_register_rfkill_notifier(struct eeepc_laptop *eeepc,
 						     eeepc);
 		if (ACPI_FAILURE(status))
 			pr_warning("Failed to register notify on %s\n", node);
+		/*
+		 * Refresh pci hotplug in case the rfkill state was
+		 * changed during setup.
+		 */
+		eeepc_rfkill_hotplug(eeepc, handle);
 	} else
 		return -ENODEV;
 
@@ -693,6 +718,12 @@ static void eeepc_unregister_rfkill_notifier(struct eeepc_laptop *eeepc,
 		if (ACPI_FAILURE(status))
 			pr_err("Error removing rfkill notify handler %s\n",
 				node);
+			/*
+			 * Refresh pci hotplug in case the rfkill
+			 * state was changed after
+			 * eeepc_unregister_rfkill_notifier()
+			 */
+		eeepc_rfkill_hotplug(eeepc, handle);
 	}
 }
 
@@ -816,11 +847,7 @@ static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc)
 		rfkill_destroy(eeepc->wlan_rfkill);
 		eeepc->wlan_rfkill = NULL;
 	}
-	/*
-	 * Refresh pci hotplug in case the rfkill state was changed after
-	 * eeepc_unregister_rfkill_notifier()
-	 */
-	eeepc_rfkill_hotplug(eeepc);
+
 	if (eeepc->hotplug_slot)
 		pci_hp_deregister(eeepc->hotplug_slot);
 
@@ -889,11 +916,6 @@ static int eeepc_rfkill_init(struct eeepc_laptop *eeepc)
 	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P5");
 	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P6");
 	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P7");
-	/*
-	 * Refresh pci hotplug in case the rfkill state was changed during
-	 * setup.
-	 */
-	eeepc_rfkill_hotplug(eeepc);
 
 exit:
 	if (result && result != -ENODEV)
@@ -928,8 +950,11 @@ static int eeepc_hotk_restore(struct device *device)
 	struct eeepc_laptop *eeepc = dev_get_drvdata(device);
 
 	/* Refresh both wlan rfkill state and pci hotplug */
-	if (eeepc->wlan_rfkill)
-		eeepc_rfkill_hotplug(eeepc);
+	if (eeepc->wlan_rfkill) {
+		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P5");
+		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P6");
+		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P7");
+	}
 
 	if (eeepc->bluetooth_rfkill)
 		rfkill_set_sw_state(eeepc->bluetooth_rfkill,
diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c
index 0ddc434fb93b..649dcadd8ea3 100644
--- a/drivers/platform/x86/eeepc-wmi.c
+++ b/drivers/platform/x86/eeepc-wmi.c
@@ -67,9 +67,11 @@ static const struct key_entry eeepc_wmi_keymap[] = {
 	{ KE_KEY, 0x82, { KEY_CAMERA } },
 	{ KE_KEY, 0x83, { KEY_CAMERA_ZOOMIN } },
 	{ KE_KEY, 0x88, { KEY_WLAN } },
+	{ KE_KEY, 0xbd, { KEY_CAMERA } },
 	{ KE_KEY, 0xcc, { KEY_SWITCHVIDEOMODE } },
 	{ KE_KEY, 0xe0, { KEY_PROG1 } }, /* Task Manager */
 	{ KE_KEY, 0xe1, { KEY_F14 } }, /* Change Resolution */
+	{ KE_KEY, 0xe8, { KEY_SCREENLOCK } },
 	{ KE_KEY, 0xe9, { KEY_BRIGHTNESS_ZERO } },
 	{ KE_KEY, 0xeb, { KEY_CAMERA_ZOOMOUT } },
 	{ KE_KEY, 0xec, { KEY_CAMERA_UP } },
diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
index d653104b59cb..464bb3fc4d88 100644
--- a/drivers/platform/x86/intel_pmic_gpio.c
+++ b/drivers/platform/x86/intel_pmic_gpio.c
@@ -74,6 +74,19 @@ struct pmic_gpio {
 	u32			trigger_type;
 };
 
+static void pmic_program_irqtype(int gpio, int type)
+{
+	if (type & IRQ_TYPE_EDGE_RISING)
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x20, 0x20);
+	else
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x20);
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x10, 0x10);
+	else
+		intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x10);
+};
+
 static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
 	if (offset > 8) {
@@ -166,16 +179,38 @@ static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
 	return pg->irq_base + offset;
 }
 
+static void pmic_bus_lock(struct irq_data *data)
+{
+	struct pmic_gpio *pg = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&pg->buslock);
+}
+
+static void pmic_bus_sync_unlock(struct irq_data *data)
+{
+	struct pmic_gpio *pg = irq_data_get_irq_chip_data(data);
+
+	if (pg->update_type) {
+		unsigned int gpio = pg->update_type & ~GPIO_UPDATE_TYPE;
+
+		pmic_program_irqtype(gpio, pg->trigger_type);
+		pg->update_type = 0;
+	}
+	mutex_unlock(&pg->buslock);
+}
+
 /* the gpiointr register is read-clear, so just do nothing. */
 static void pmic_irq_unmask(struct irq_data *data) { }
 
 static void pmic_irq_mask(struct irq_data *data) { }
 
 static struct irq_chip pmic_irqchip = {
-	.name		= "PMIC-GPIO",
-	.irq_mask	= pmic_irq_mask,
-	.irq_unmask	= pmic_irq_unmask,
-	.irq_set_type	= pmic_irq_type,
+	.name			= "PMIC-GPIO",
+	.irq_mask		= pmic_irq_mask,
+	.irq_unmask		= pmic_irq_unmask,
+	.irq_set_type		= pmic_irq_type,
+	.irq_bus_lock		= pmic_bus_lock,
+	.irq_bus_sync_unlock	= pmic_bus_sync_unlock,
 };
 
 static irqreturn_t pmic_irq_handler(int irq, void *data)
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index de434c6dc2d6..d347116d150e 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -571,6 +571,16 @@ static struct dmi_system_id __initdata samsung_dmi_table[] = {
 		.callback = dmi_check_cb,
 	},
 	{
+		.ident = "R410 Plus",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR,
+					"SAMSUNG ELECTRONICS CO., LTD."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "R410P"),
+			DMI_MATCH(DMI_BOARD_NAME, "R460"),
+		},
+		.callback = dmi_check_cb,
+	},
+	{
 		.ident = "R518",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR,
@@ -591,12 +601,12 @@ static struct dmi_system_id __initdata samsung_dmi_table[] = {
 		.callback = dmi_check_cb,
 	},
 	{
-		.ident = "N150/N210/N220",
+		.ident = "N150/N210/N220/N230",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR,
 					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220"),
-			DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220/N230"),
+			DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220/N230"),
 		},
 		.callback = dmi_check_cb,
 	},
@@ -771,6 +781,7 @@ static int __init samsung_init(void)
 
 	/* create a backlight device to talk to this one */
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = sabi_config->max_brightness;
 	backlight_device = backlight_device_register("samsung", &sdev->dev,
 						     NULL, &backlight_ops,
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index e642f5f29504..6fe8cd6e23b5 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -138,6 +138,8 @@ MODULE_PARM_DESC(kbd_backlight_timeout,
 		 "1 for 30 seconds, 2 for 60 seconds and 3 to disable timeout "
 		 "(default: 0)");
 
+static void sony_nc_kbd_backlight_resume(void);
+
 enum sony_nc_rfkill {
 	SONY_WIFI,
 	SONY_BLUETOOTH,
@@ -771,11 +773,6 @@ static int sony_nc_handles_setup(struct platform_device *pd)
 	if (!handles)
 		return -ENOMEM;
 
-	sysfs_attr_init(&handles->devattr.attr);
-	handles->devattr.attr.name = "handles";
-	handles->devattr.attr.mode = S_IRUGO;
-	handles->devattr.show = sony_nc_handles_show;
-
 	for (i = 0; i < ARRAY_SIZE(handles->cap); i++) {
 		if (!acpi_callsetfunc(sony_nc_acpi_handle,
 					"SN00", i + 0x20, &result)) {
@@ -785,11 +782,18 @@ static int sony_nc_handles_setup(struct platform_device *pd)
 		}
 	}
 
-	/* allow reading capabilities via sysfs */
-	if (device_create_file(&pd->dev, &handles->devattr)) {
-		kfree(handles);
-		handles = NULL;
-		return -1;
+	if (debug) {
+		sysfs_attr_init(&handles->devattr.attr);
+		handles->devattr.attr.name = "handles";
+		handles->devattr.attr.mode = S_IRUGO;
+		handles->devattr.show = sony_nc_handles_show;
+
+		/* allow reading capabilities via sysfs */
+		if (device_create_file(&pd->dev, &handles->devattr)) {
+			kfree(handles);
+			handles = NULL;
+			return -1;
+		}
 	}
 
 	return 0;
@@ -798,7 +802,8 @@ static int sony_nc_handles_setup(struct platform_device *pd)
 static int sony_nc_handles_cleanup(struct platform_device *pd)
 {
 	if (handles) {
-		device_remove_file(&pd->dev, &handles->devattr);
+		if (debug)
+			device_remove_file(&pd->dev, &handles->devattr);
 		kfree(handles);
 		handles = NULL;
 	}
@@ -808,6 +813,11 @@ static int sony_nc_handles_cleanup(struct platform_device *pd)
 static int sony_find_snc_handle(int handle)
 {
 	int i;
+
+	/* not initialized yet, return early */
+	if (!handles)
+		return -1;
+
 	for (i = 0; i < 0x10; i++) {
 		if (handles->cap[i] == handle) {
 			dprintk("found handle 0x%.4x (offset: 0x%.2x)\n",
@@ -924,6 +934,14 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,
 /*
  * Backlight device
  */
+struct sony_backlight_props {
+	struct backlight_device *dev;
+	int			handle;
+	u8			offset;
+	u8			maxlvl;
+};
+struct sony_backlight_props sony_bl_props;
+
 static int sony_backlight_update_status(struct backlight_device *bd)
 {
 	return acpi_callsetfunc(sony_nc_acpi_handle, "SBRT",
@@ -944,21 +962,26 @@ static int sony_nc_get_brightness_ng(struct backlight_device *bd)
 {
 	int result;
 	int *handle = (int *)bl_get_data(bd);
+	struct sony_backlight_props *sdev =
+		(struct sony_backlight_props *)bl_get_data(bd);
 
-	sony_call_snc_handle(*handle, 0x0200, &result);
+	sony_call_snc_handle(sdev->handle, 0x0200, &result);
 
-	return result & 0xff;
+	return (result & 0xff) - sdev->offset;
 }
 
 static int sony_nc_update_status_ng(struct backlight_device *bd)
 {
 	int value, result;
 	int *handle = (int *)bl_get_data(bd);
+	struct sony_backlight_props *sdev =
+		(struct sony_backlight_props *)bl_get_data(bd);
 
-	value = bd->props.brightness;
-	sony_call_snc_handle(*handle, 0x0100 | (value << 16), &result);
+	value = bd->props.brightness + sdev->offset;
+	if (sony_call_snc_handle(sdev->handle, 0x0100 | (value << 16), &result))
+		return -EIO;
 
-	return sony_nc_get_brightness_ng(bd);
+	return value;
 }
 
 static const struct backlight_ops sony_backlight_ops = {
@@ -971,8 +994,6 @@ static const struct backlight_ops sony_backlight_ng_ops = {
 	.update_status = sony_nc_update_status_ng,
 	.get_brightness = sony_nc_get_brightness_ng,
 };
-static int backlight_ng_handle;
-static struct backlight_device *sony_backlight_device;
 
 /*
  * New SNC-only Vaios event mapping to driver known keys
@@ -1168,6 +1189,9 @@ static int sony_nc_resume(struct acpi_device *device)
 	/* re-read rfkill state */
 	sony_nc_rfkill_update();
 
+	/* restore kbd backlight states */
+	sony_nc_kbd_backlight_resume();
+
 	return 0;
 }
 
@@ -1355,6 +1379,7 @@ out_no_enum:
 #define KBDBL_HANDLER	0x137
 #define KBDBL_PRESENT	0xB00
 #define	SET_MODE	0xC00
+#define SET_STATE	0xD00
 #define SET_TIMEOUT	0xE00
 
 struct kbd_backlight {
@@ -1377,6 +1402,10 @@ static ssize_t __sony_nc_kbd_backlight_mode_set(u8 value)
 				(value << 0x10) | SET_MODE, &result))
 		return -EIO;
 
+	/* Try to turn the light on/off immediately */
+	sony_call_snc_handle(KBDBL_HANDLER, (value << 0x10) | SET_STATE,
+			&result);
+
 	kbdbl_handle->mode = value;
 
 	return 0;
@@ -1458,7 +1487,7 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd)
 {
 	int result;
 
-	if (sony_call_snc_handle(0x137, KBDBL_PRESENT, &result))
+	if (sony_call_snc_handle(KBDBL_HANDLER, KBDBL_PRESENT, &result))
 		return 0;
 	if (!(result & 0x02))
 		return 0;
@@ -1501,13 +1530,105 @@ outkzalloc:
 static int sony_nc_kbd_backlight_cleanup(struct platform_device *pd)
 {
 	if (kbdbl_handle) {
+		int result;
+
 		device_remove_file(&pd->dev, &kbdbl_handle->mode_attr);
 		device_remove_file(&pd->dev, &kbdbl_handle->timeout_attr);
+
+		/* restore the default hw behaviour */
+		sony_call_snc_handle(KBDBL_HANDLER, 0x1000 | SET_MODE, &result);
+		sony_call_snc_handle(KBDBL_HANDLER, SET_TIMEOUT, &result);
+
 		kfree(kbdbl_handle);
 	}
 	return 0;
 }
 
+static void sony_nc_kbd_backlight_resume(void)
+{
+	int ignore = 0;
+
+	if (!kbdbl_handle)
+		return;
+
+	if (kbdbl_handle->mode == 0)
+		sony_call_snc_handle(KBDBL_HANDLER, SET_MODE, &ignore);
+
+	if (kbdbl_handle->timeout != 0)
+		sony_call_snc_handle(KBDBL_HANDLER,
+				(kbdbl_handle->timeout << 0x10) | SET_TIMEOUT,
+				&ignore);
+}
+
+static void sony_nc_backlight_ng_read_limits(int handle,
+		struct sony_backlight_props *props)
+{
+	int offset;
+	acpi_status status;
+	u8 brlvl, i;
+	u8 min = 0xff, max = 0x00;
+	struct acpi_object_list params;
+	union acpi_object in_obj;
+	union acpi_object *lvl_enum;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	props->handle = handle;
+	props->offset = 0;
+	props->maxlvl = 0xff;
+
+	offset = sony_find_snc_handle(handle);
+	if (offset < 0)
+		return;
+
+	/* try to read the boundaries from ACPI tables, if we fail the above
+	 * defaults should be reasonable
+	 */
+	params.count = 1;
+	params.pointer = &in_obj;
+	in_obj.type = ACPI_TYPE_INTEGER;
+	in_obj.integer.value = offset;
+	status = acpi_evaluate_object(sony_nc_acpi_handle, "SN06", &params,
+			&buffer);
+	if (ACPI_FAILURE(status))
+		return;
+
+	lvl_enum = (union acpi_object *) buffer.pointer;
+	if (!lvl_enum) {
+		pr_err("No SN06 return object.");
+		return;
+	}
+	if (lvl_enum->type != ACPI_TYPE_BUFFER) {
+		pr_err("Invalid SN06 return object 0x%.2x\n",
+		       lvl_enum->type);
+		goto out_invalid;
+	}
+
+	/* the buffer lists brightness levels available, brightness levels are
+	 * from 0 to 8 in the array, other values are used by ALS control.
+	 */
+	for (i = 0; i < 9 && i < lvl_enum->buffer.length; i++) {
+
+		brlvl = *(lvl_enum->buffer.pointer + i);
+		dprintk("Brightness level: %d\n", brlvl);
+
+		if (!brlvl)
+			break;
+
+		if (brlvl > max)
+			max = brlvl;
+		if (brlvl < min)
+			min = brlvl;
+	}
+	props->offset = min;
+	props->maxlvl = max;
+	dprintk("Brightness levels: min=%d max=%d\n", props->offset,
+			props->maxlvl);
+
+out_invalid:
+	kfree(buffer.pointer);
+	return;
+}
+
 static void sony_nc_backlight_setup(void)
 {
 	acpi_handle unused;
@@ -1516,14 +1637,14 @@ static void sony_nc_backlight_setup(void)
 	struct backlight_properties props;
 
 	if (sony_find_snc_handle(0x12f) != -1) {
-		backlight_ng_handle = 0x12f;
 		ops = &sony_backlight_ng_ops;
-		max_brightness = 0xff;
+		sony_nc_backlight_ng_read_limits(0x12f, &sony_bl_props);
+		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
 
 	} else if (sony_find_snc_handle(0x137) != -1) {
-		backlight_ng_handle = 0x137;
 		ops = &sony_backlight_ng_ops;
-		max_brightness = 0xff;
+		sony_nc_backlight_ng_read_limits(0x137, &sony_bl_props);
+		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
 
 	} else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT",
 						&unused))) {
@@ -1536,22 +1657,22 @@ static void sony_nc_backlight_setup(void)
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = max_brightness;
-	sony_backlight_device = backlight_device_register("sony", NULL,
-							  &backlight_ng_handle,
-							  ops, &props);
+	sony_bl_props.dev = backlight_device_register("sony", NULL,
+						      &sony_bl_props,
+						      ops, &props);
 
-	if (IS_ERR(sony_backlight_device)) {
-		pr_warning(DRV_PFX "unable to register backlight device\n");
-		sony_backlight_device = NULL;
+	if (IS_ERR(sony_bl_props.dev)) {
+		pr_warn(DRV_PFX "unable to register backlight device\n");
+		sony_bl_props.dev = NULL;
 	} else
-		sony_backlight_device->props.brightness =
-		    ops->get_brightness(sony_backlight_device);
+		sony_bl_props.dev->props.brightness =
+			ops->get_brightness(sony_bl_props.dev);
 }
 
 static void sony_nc_backlight_cleanup(void)
 {
-	if (sony_backlight_device)
-		backlight_device_unregister(sony_backlight_device);
+	if (sony_bl_props.dev)
+		backlight_device_unregister(sony_bl_props.dev);
 }
 
 static int sony_nc_add(struct acpi_device *device)
@@ -2549,7 +2670,7 @@ static long sonypi_misc_ioctl(struct file *fp, unsigned int cmd,
 	mutex_lock(&spic_dev.lock);
 	switch (cmd) {
 	case SONYPI_IOCGBRT:
-		if (sony_backlight_device == NULL) {
+		if (sony_bl_props.dev == NULL) {
 			ret = -EIO;
 			break;
 		}
@@ -2562,7 +2683,7 @@ static long sonypi_misc_ioctl(struct file *fp, unsigned int cmd,
 				ret = -EFAULT;
 		break;
 	case SONYPI_IOCSBRT:
-		if (sony_backlight_device == NULL) {
+		if (sony_bl_props.dev == NULL) {
 			ret = -EIO;
 			break;
 		}
@@ -2576,8 +2697,8 @@ static long sonypi_misc_ioctl(struct file *fp, unsigned int cmd,
 			break;
 		}
 		/* sync the backlight device status */
-		sony_backlight_device->props.brightness =
-		    sony_backlight_get_brightness(sony_backlight_device);
+		sony_bl_props.dev->props.brightness =
+		    sony_backlight_get_brightness(sony_bl_props.dev);
 		break;
 	case SONYPI_IOCGBAT1CAP:
 		if (ec_read16(SONYPI_BAT1_FULL, &val16)) {
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index a08561f5349e..562fcf0dd2b5 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -128,7 +128,8 @@ enum {
 };
 
 /* ACPI HIDs */
-#define TPACPI_ACPI_HKEY_HID		"IBM0068"
+#define TPACPI_ACPI_IBM_HKEY_HID	"IBM0068"
+#define TPACPI_ACPI_LENOVO_HKEY_HID	"LEN0068"
 #define TPACPI_ACPI_EC_HID		"PNP0C09"
 
 /* Input IDs */
@@ -3879,7 +3880,8 @@ errexit:
 }
 
 static const struct acpi_device_id ibm_htk_device_ids[] = {
-	{TPACPI_ACPI_HKEY_HID, 0},
+	{TPACPI_ACPI_IBM_HKEY_HID, 0},
+	{TPACPI_ACPI_LENOVO_HKEY_HID, 0},
 	{"", 0},
 };
 
@@ -8618,8 +8620,7 @@ static bool __pure __init tpacpi_is_valid_fw_id(const char* const s,
 		tpacpi_is_fw_digit(s[1]) &&
 		s[2] == t && s[3] == 'T' &&
 		tpacpi_is_fw_digit(s[4]) &&
-		tpacpi_is_fw_digit(s[5]) &&
-		s[6] == 'W' && s[7] == 'W';
+		tpacpi_is_fw_digit(s[5]);
 }
 
 /* returns 0 - probe ok, or < 0 - probe error.
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index c29719cacbca..86c9a091a2ff 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1171,16 +1171,17 @@ static int rio_hdid_setup(char *str)
 
 __setup("riohdid=", rio_hdid_setup);
 
-void rio_register_mport(struct rio_mport *port)
+int rio_register_mport(struct rio_mport *port)
 {
 	if (next_portid >= RIO_MAX_MPORTS) {
 		pr_err("RIO: reached specified max number of mports\n");
-		return;
+		return 1;
 	}
 
 	port->id = next_portid++;
 	port->host_deviceid = rio_get_hdid(port->id);
 	list_add_tail(&port->node, &rio_mports);
+	return 0;
 }
 
 EXPORT_SYMBOL_GPL(rio_local_get_device_id);
diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c
index 095016a9dec1..043ee3136e40 100644
--- a/drivers/rapidio/switches/idt_gen2.c
+++ b/drivers/rapidio/switches/idt_gen2.c
@@ -95,6 +95,9 @@ idtg2_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
 	else
 		table++;
 
+	if (route_port == RIO_INVALID_ROUTE)
+		route_port = IDT_DEFAULT_ROUTE;
+
 	rio_mport_write_config_32(mport, destid, hopcount,
 				  LOCAL_RTE_CONF_DESTID_SEL, table);
 
@@ -411,6 +414,12 @@ static int idtg2_switch_init(struct rio_dev *rdev, int do_enum)
 	rdev->rswitch->em_handle = idtg2_em_handler;
 	rdev->rswitch->sw_sysfs = idtg2_sysfs;
 
+	if (do_enum) {
+		/* Ensure that default routing is disabled on startup */
+		rio_write_config_32(rdev,
+				    RIO_STD_RTE_DEFAULT_PORT, IDT_NO_ROUTE);
+	}
+
 	return 0;
 }
 
@@ -418,3 +427,4 @@ DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init);
 DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init);
 DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTVPS1616, idtg2_switch_init);
 DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTSPS1616, idtg2_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1432, idtg2_switch_init);
diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c
index 3a971077e7bf..d06ee2d44b44 100644
--- a/drivers/rapidio/switches/idtcps.c
+++ b/drivers/rapidio/switches/idtcps.c
@@ -26,6 +26,9 @@ idtcps_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
 {
 	u32 result;
 
+	if (route_port == RIO_INVALID_ROUTE)
+		route_port = CPS_DEFAULT_ROUTE;
+
 	if (table == RIO_GLOBAL_TABLE) {
 		rio_mport_write_config_32(mport, destid, hopcount,
 				RIO_STD_RTE_CONF_DESTID_SEL_CSR, route_destid);
@@ -130,6 +133,9 @@ static int idtcps_switch_init(struct rio_dev *rdev, int do_enum)
 		/* set TVAL = ~50us */
 		rio_write_config_32(rdev,
 			rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8);
+		/* Ensure that default routing is disabled on startup */
+		rio_write_config_32(rdev,
+				    RIO_STD_RTE_DEFAULT_PORT, CPS_NO_ROUTE);
 	}
 
 	return 0;
diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c
index 1a62934bfebc..db8b8028988d 100644
--- a/drivers/rapidio/switches/tsi57x.c
+++ b/drivers/rapidio/switches/tsi57x.c
@@ -303,6 +303,12 @@ static int tsi57x_switch_init(struct rio_dev *rdev, int do_enum)
 	rdev->rswitch->em_init = tsi57x_em_init;
 	rdev->rswitch->em_handle = tsi57x_em_handler;
 
+	if (do_enum) {
+		/* Ensure that default routing is disabled on startup */
+		rio_write_config_32(rdev, RIO_STD_RTE_DEFAULT_PORT,
+				    RIO_INVALID_ROUTE);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index e1878877399c..42891726ea72 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -3,10 +3,10 @@
 #
 
 config RTC_LIB
-	tristate
+	bool
 
 menuconfig RTC_CLASS
-	tristate "Real Time Clock"
+	bool "Real Time Clock"
 	default n
 	depends on !S390
 	select RTC_LIB
@@ -15,9 +15,6 @@ menuconfig RTC_CLASS
  	  be allowed to plug one or more RTCs to your system. You will
 	  probably want to enable one or more of the interfaces below.
 
-	  This driver can also be built as a module. If so, the module
-	  will be called rtc-core.
-
 if RTC_CLASS
 
 config RTC_HCTOSYS
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 09b4437b3e61..4194e59e14cd 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -41,26 +41,21 @@ static void rtc_device_release(struct device *dev)
  * system's wall clock; restore it on resume().
  */
 
-static struct timespec	delta;
 static time_t		oldtime;
+static struct timespec	oldts;
 
 static int rtc_suspend(struct device *dev, pm_message_t mesg)
 {
 	struct rtc_device	*rtc = to_rtc_device(dev);
 	struct rtc_time		tm;
-	struct timespec		ts = current_kernel_time();
 
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
 	rtc_read_time(rtc, &tm);
+	ktime_get_ts(&oldts);
 	rtc_tm_to_time(&tm, &oldtime);
 
-	/* RTC precision is 1 second; adjust delta for avg 1/2 sec err */
-	set_normalized_timespec(&delta,
-				ts.tv_sec - oldtime,
-				ts.tv_nsec - (NSEC_PER_SEC >> 1));
-
 	return 0;
 }
 
@@ -70,10 +65,12 @@ static int rtc_resume(struct device *dev)
 	struct rtc_time		tm;
 	time_t			newtime;
 	struct timespec		time;
+	struct timespec		newts;
 
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
+	ktime_get_ts(&newts);
 	rtc_read_time(rtc, &tm);
 	if (rtc_valid_tm(&tm) != 0) {
 		pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
@@ -85,15 +82,13 @@ static int rtc_resume(struct device *dev)
 			pr_debug("%s:  time travel!\n", dev_name(&rtc->dev));
 		return 0;
 	}
+	/* calculate the RTC time delta */
+	set_normalized_timespec(&time, newtime - oldtime, 0);
 
-	/* restore wall clock using delta against this RTC;
-	 * adjust again for avg 1/2 second RTC sampling error
-	 */
-	set_normalized_timespec(&time,
-				newtime + delta.tv_sec,
-				(NSEC_PER_SEC >> 1) + delta.tv_nsec);
-	do_settimeofday(&time);
+	/* subtract kernel time between rtc_suspend to rtc_resume */
+	time = timespec_sub(time, timespec_sub(newts, oldts));
 
+	timekeeping_inject_sleeptime(&time);
 	return 0;
 }
 
@@ -171,7 +166,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	err = __rtc_read_alarm(rtc, &alrm);
 
 	if (!err && !rtc_valid_tm(&alrm.time))
-		rtc_set_alarm(rtc, &alrm);
+		rtc_initialize_alarm(rtc, &alrm);
 
 	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
 	dev_set_name(&rtc->dev, "rtc%d", id);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 23719f0acbf6..ef6316acec43 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -375,6 +375,32 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
 
+/* Called once per device from rtc_device_register */
+int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+	int err;
+
+	err = rtc_valid_tm(&alarm->time);
+	if (err != 0)
+		return err;
+
+	err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return err;
+
+	rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
+	rtc->aie_timer.period = ktime_set(0, 0);
+	if (alarm->enabled) {
+		rtc->aie_timer.enabled = 1;
+		timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
+	}
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_initialize_alarm);
+
+
+
 int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 {
 	int err = mutex_lock_interruptible(&rtc->ops_lock);
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index a0fc4cf42abf..90d866272c8e 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -250,6 +250,8 @@ static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 		bfin_rtc_int_set_alarm(rtc);
 	else
 		bfin_rtc_int_clear(~(RTC_ISTAT_ALARM | RTC_ISTAT_ALARM_DAY));
+
+	return 0;
 }
 
 static int bfin_rtc_read_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index 316f484999b5..80f9c88214c5 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -220,6 +220,7 @@ static int __init coh901331_probe(struct platform_device *pdev)
 	}
 	clk_disable(rtap->clk);
 
+	platform_set_drvdata(pdev, rtap);
 	rtap->rtc = rtc_device_register("coh901331", &pdev->dev, &coh901331_ops,
 					 THIS_MODULE);
 	if (IS_ERR(rtap->rtc)) {
@@ -227,11 +228,10 @@ static int __init coh901331_probe(struct platform_device *pdev)
 		goto out_no_rtc;
 	}
 
-	platform_set_drvdata(pdev, rtap);
-
 	return 0;
 
  out_no_rtc:
+	platform_set_drvdata(pdev, NULL);
  out_no_clk_enable:
 	clk_put(rtap->clk);
  out_no_clk:
diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
index 8d46838dff8a..755e1fe914af 100644
--- a/drivers/rtc/rtc-davinci.c
+++ b/drivers/rtc/rtc-davinci.c
@@ -524,6 +524,8 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
 		goto fail2;
 	}
 
+	platform_set_drvdata(pdev, davinci_rtc);
+
 	davinci_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
 				    &davinci_rtc_ops, THIS_MODULE);
 	if (IS_ERR(davinci_rtc->rtc)) {
@@ -553,8 +555,6 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
 
 	rtcss_write(davinci_rtc, PRTCSS_RTC_CCTRL_CAEN, PRTCSS_RTC_CCTRL);
 
-	platform_set_drvdata(pdev, davinci_rtc);
-
 	device_init_wakeup(&pdev->dev, 0);
 
 	return 0;
@@ -562,6 +562,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
 fail4:
 	rtc_device_unregister(davinci_rtc->rtc);
 fail3:
+	platform_set_drvdata(pdev, NULL);
 	iounmap(davinci_rtc->base);
 fail2:
 	release_mem_region(davinci_rtc->pbase, davinci_rtc->base_size);
diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c
index 60ce69600828..47e681df31e2 100644
--- a/drivers/rtc/rtc-ds1286.c
+++ b/drivers/rtc/rtc-ds1286.c
@@ -355,6 +355,7 @@ static int __devinit ds1286_probe(struct platform_device *pdev)
 		goto out;
 	}
 	spin_lock_init(&priv->lock);
+	platform_set_drvdata(pdev, priv);
 	rtc = rtc_device_register("ds1286", &pdev->dev,
 				  &ds1286_ops, THIS_MODULE);
 	if (IS_ERR(rtc)) {
@@ -362,7 +363,6 @@ static int __devinit ds1286_probe(struct platform_device *pdev)
 		goto out;
 	}
 	priv->rtc = rtc;
-	platform_set_drvdata(pdev, priv);
 	return 0;
 
 out:
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index 11ae64dcbf3c..335551d333b2 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -151,6 +151,7 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 		return -ENXIO;
 
 	pdev->dev.platform_data = ep93xx_rtc;
+	platform_set_drvdata(pdev, rtc);
 
 	rtc = rtc_device_register(pdev->name,
 				&pdev->dev, &ep93xx_rtc_ops, THIS_MODULE);
@@ -159,8 +160,6 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 		goto exit;
 	}
 
-	platform_set_drvdata(pdev, rtc);
-
 	err = sysfs_create_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files);
 	if (err)
 		goto fail;
@@ -168,9 +167,9 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 	return 0;
 
 fail:
-	platform_set_drvdata(pdev, NULL);
 	rtc_device_unregister(rtc);
 exit:
+	platform_set_drvdata(pdev, NULL);
 	pdev->dev.platform_data = NULL;
 	return err;
 }
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 69fe664a2228..eda128fc1d38 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -783,6 +783,9 @@ static int m41t80_probe(struct i2c_client *client,
 		goto exit;
 	}
 
+	clientdata->features = id->driver_data;
+	i2c_set_clientdata(client, clientdata);
+
 	rtc = rtc_device_register(client->name, &client->dev,
 				  &m41t80_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc)) {
@@ -792,8 +795,6 @@ static int m41t80_probe(struct i2c_client *client,
 	}
 
 	clientdata->rtc = rtc;
-	clientdata->features = id->driver_data;
-	i2c_set_clientdata(client, clientdata);
 
 	/* Make sure HT (Halt Update) bit is cleared */
 	rc = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_HOUR);
diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c
index 174036dda786..3bc046f427e0 100644
--- a/drivers/rtc/rtc-max8925.c
+++ b/drivers/rtc/rtc-max8925.c
@@ -257,6 +257,10 @@ static int __devinit max8925_rtc_probe(struct platform_device *pdev)
 		goto out_irq;
 	}
 
+	dev_set_drvdata(&pdev->dev, info);
+	/* XXX - isn't this redundant? */
+	platform_set_drvdata(pdev, info);
+
 	info->rtc_dev = rtc_device_register("max8925-rtc", &pdev->dev,
 					&max8925_rtc_ops, THIS_MODULE);
 	ret = PTR_ERR(info->rtc_dev);
@@ -265,11 +269,9 @@ static int __devinit max8925_rtc_probe(struct platform_device *pdev)
 		goto out_rtc;
 	}
 
-	dev_set_drvdata(&pdev->dev, info);
-	platform_set_drvdata(pdev, info);
-
 	return 0;
 out_rtc:
+	platform_set_drvdata(pdev, NULL);
 	free_irq(chip->irq_base + MAX8925_IRQ_RTC_ALARM0, info);
 out_irq:
 	kfree(info);
diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c
index 3f7bc6b9fefa..2e48aa604273 100644
--- a/drivers/rtc/rtc-max8998.c
+++ b/drivers/rtc/rtc-max8998.c
@@ -265,6 +265,8 @@ static int __devinit max8998_rtc_probe(struct platform_device *pdev)
 	info->rtc = max8998->rtc;
 	info->irq = max8998->irq_base + MAX8998_IRQ_ALARM0;
 
+	platform_set_drvdata(pdev, info);
+
 	info->rtc_dev = rtc_device_register("max8998-rtc", &pdev->dev,
 			&max8998_rtc_ops, THIS_MODULE);
 
@@ -274,8 +276,6 @@ static int __devinit max8998_rtc_probe(struct platform_device *pdev)
 		goto out_rtc;
 	}
 
-	platform_set_drvdata(pdev, info);
-
 	ret = request_threaded_irq(info->irq, NULL, max8998_rtc_alarm_irq, 0,
 			"rtc-alarm0", info);
 
@@ -293,6 +293,7 @@ static int __devinit max8998_rtc_probe(struct platform_device *pdev)
 	return 0;
 
 out_rtc:
+	platform_set_drvdata(pdev, NULL);
 	kfree(info);
 	return ret;
 }
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index c42006469559..a1a278bc340d 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -349,11 +349,15 @@ static int __devinit mc13xxx_rtc_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_alarm_irq_request;
 
+	mc13xxx_unlock(mc13xxx);
+
 	priv->rtc = rtc_device_register(pdev->name,
 			&pdev->dev, &mc13xxx_rtc_ops, THIS_MODULE);
 	if (IS_ERR(priv->rtc)) {
 		ret = PTR_ERR(priv->rtc);
 
+		mc13xxx_lock(mc13xxx);
+
 		mc13xxx_irq_free(mc13xxx, MC13XXX_IRQ_TODA, priv);
 err_alarm_irq_request:
 
@@ -365,12 +369,12 @@ err_reset_irq_status:
 		mc13xxx_irq_free(mc13xxx, MC13XXX_IRQ_RTCRST, priv);
 err_reset_irq_request:
 
+		mc13xxx_unlock(mc13xxx);
+
 		platform_set_drvdata(pdev, NULL);
 		kfree(priv);
 	}
 
-	mc13xxx_unlock(mc13xxx);
-
 	return ret;
 }
 
@@ -401,6 +405,7 @@ const struct platform_device_id mc13xxx_rtc_idtable[] = {
 	}, {
 		.name = "mc13892-rtc",
 	},
+	{ }
 };
 
 static struct platform_driver mc13xxx_rtc_driver = {
diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c
index 67820626e18f..fcb113c11122 100644
--- a/drivers/rtc/rtc-msm6242.c
+++ b/drivers/rtc/rtc-msm6242.c
@@ -214,6 +214,7 @@ static int __init msm6242_rtc_probe(struct platform_device *dev)
 		error = -ENOMEM;
 		goto out_free_priv;
 	}
+	platform_set_drvdata(dev, priv);
 
 	rtc = rtc_device_register("rtc-msm6242", &dev->dev, &msm6242_rtc_ops,
 				  THIS_MODULE);
@@ -223,10 +224,10 @@ static int __init msm6242_rtc_probe(struct platform_device *dev)
 	}
 
 	priv->rtc = rtc;
-	platform_set_drvdata(dev, priv);
 	return 0;
 
 out_unmap:
+	platform_set_drvdata(dev, NULL);
 	iounmap(priv->regs);
 out_free_priv:
 	kfree(priv);
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 826ab64a8fa9..d814417bee8c 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -418,14 +418,6 @@ static int __init mxc_rtc_probe(struct platform_device *pdev)
 		goto exit_put_clk;
 	}
 
-	rtc = rtc_device_register(pdev->name, &pdev->dev, &mxc_rtc_ops,
-				  THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		ret = PTR_ERR(rtc);
-		goto exit_put_clk;
-	}
-
-	pdata->rtc = rtc;
 	platform_set_drvdata(pdev, pdata);
 
 	/* Configure and enable the RTC */
@@ -438,8 +430,19 @@ static int __init mxc_rtc_probe(struct platform_device *pdev)
 		pdata->irq = -1;
 	}
 
+	rtc = rtc_device_register(pdev->name, &pdev->dev, &mxc_rtc_ops,
+				  THIS_MODULE);
+	if (IS_ERR(rtc)) {
+		ret = PTR_ERR(rtc);
+		goto exit_clr_drvdata;
+	}
+
+	pdata->rtc = rtc;
+
 	return 0;
 
+exit_clr_drvdata:
+	platform_set_drvdata(pdev, NULL);
 exit_put_clk:
 	clk_disable(pdata->clk);
 	clk_put(pdata->clk);
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index de0dd7b1f146..bcae8dd41496 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -394,7 +394,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
 	return 0;
 
 fail2:
-	free_irq(omap_rtc_timer, NULL);
+	free_irq(omap_rtc_timer, rtc);
 fail1:
 	rtc_device_unregister(rtc);
 fail0:
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index a633abc42896..cd4f198cc2ef 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -151,6 +151,8 @@ static int __devinit pcap_rtc_probe(struct platform_device *pdev)
 
 	pcap_rtc->pcap = dev_get_drvdata(pdev->dev.parent);
 
+	platform_set_drvdata(pdev, pcap_rtc);
+
 	pcap_rtc->rtc = rtc_device_register("pcap", &pdev->dev,
 				  &pcap_rtc_ops, THIS_MODULE);
 	if (IS_ERR(pcap_rtc->rtc)) {
@@ -158,7 +160,6 @@ static int __devinit pcap_rtc_probe(struct platform_device *pdev)
 		goto fail_rtc;
 	}
 
-	platform_set_drvdata(pdev, pcap_rtc);
 
 	timer_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ);
 	alarm_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA);
@@ -177,6 +178,7 @@ fail_alarm:
 fail_timer:
 	rtc_device_unregister(pcap_rtc->rtc);
 fail_rtc:
+	platform_set_drvdata(pdev, NULL);
 	kfree(pcap_rtc);
 	return err;
 }
diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c
index 694da39b6dd2..359da6d020b9 100644
--- a/drivers/rtc/rtc-rp5c01.c
+++ b/drivers/rtc/rtc-rp5c01.c
@@ -249,15 +249,15 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
 
 	spin_lock_init(&priv->lock);
 
+	platform_set_drvdata(dev, priv);
+
 	rtc = rtc_device_register("rtc-rp5c01", &dev->dev, &rp5c01_rtc_ops,
 				  THIS_MODULE);
 	if (IS_ERR(rtc)) {
 		error = PTR_ERR(rtc);
 		goto out_unmap;
 	}
-
 	priv->rtc = rtc;
-	platform_set_drvdata(dev, priv);
 
 	error = sysfs_create_bin_file(&dev->dev.kobj, &priv->nvram_attr);
 	if (error)
@@ -268,6 +268,7 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
 out_unregister:
 	rtc_device_unregister(rtc);
 out_unmap:
+	platform_set_drvdata(dev, NULL);
 	iounmap(priv->regs);
 out_free_priv:
 	kfree(priv);
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 714964913e5e..16512ecae31a 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -46,6 +46,7 @@ static struct clk *rtc_clk;
 static void __iomem *s3c_rtc_base;
 static int s3c_rtc_alarmno = NO_IRQ;
 static int s3c_rtc_tickno  = NO_IRQ;
+static bool wake_en;
 static enum s3c_cpu_type s3c_rtc_cpu_type;
 
 static DEFINE_SPINLOCK(s3c_rtc_pie_lock);
@@ -336,7 +337,6 @@ static void s3c_rtc_release(struct device *dev)
 
 	/* do not clear AIE here, it may be needed for wake */
 
-	s3c_rtc_setpie(dev, 0);
 	free_irq(s3c_rtc_alarmno, rtc_dev);
 	free_irq(s3c_rtc_tickno, rtc_dev);
 }
@@ -408,7 +408,6 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
 	platform_set_drvdata(dev, NULL);
 	rtc_device_unregister(rtc);
 
-	s3c_rtc_setpie(&dev->dev, 0);
 	s3c_rtc_setaie(&dev->dev, 0);
 
 	clk_disable(rtc_clk);
@@ -564,8 +563,12 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state)
 	}
 	s3c_rtc_enable(pdev, 0);
 
-	if (device_may_wakeup(&pdev->dev))
-		enable_irq_wake(s3c_rtc_alarmno);
+	if (device_may_wakeup(&pdev->dev) && !wake_en) {
+		if (enable_irq_wake(s3c_rtc_alarmno) == 0)
+			wake_en = true;
+		else
+			dev_err(&pdev->dev, "enable_irq_wake failed\n");
+	}
 
 	return 0;
 }
@@ -581,8 +584,10 @@ static int s3c_rtc_resume(struct platform_device *pdev)
 		writew(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON);
 	}
 
-	if (device_may_wakeup(&pdev->dev))
+	if (device_may_wakeup(&pdev->dev) && wake_en) {
 		disable_irq_wake(s3c_rtc_alarmno);
+		wake_en = false;
+	}
 
 	return 0;
 }
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 4d2df2f76ea0..86b6f1cc1b10 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1742,11 +1742,20 @@ int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr)
 static inline int _dasd_term_running_cqr(struct dasd_device *device)
 {
 	struct dasd_ccw_req *cqr;
+	int rc;
 
 	if (list_empty(&device->ccw_queue))
 		return 0;
 	cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
-	return device->discipline->term_IO(cqr);
+	rc = device->discipline->term_IO(cqr);
+	if (!rc)
+		/*
+		 * CQR terminated because a more important request is pending.
+		 * Undo decreasing of retry counter because this is
+		 * not an error case.
+		 */
+		cqr->retries++;
+	return rc;
 }
 
 int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr)
@@ -2314,15 +2323,14 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 
 static int dasd_open(struct block_device *bdev, fmode_t mode)
 {
-	struct dasd_block *block = bdev->bd_disk->private_data;
 	struct dasd_device *base;
 	int rc;
 
-	if (!block)
+	base = dasd_device_from_gendisk(bdev->bd_disk);
+	if (!base)
 		return -ENODEV;
 
-	base = block->base;
-	atomic_inc(&block->open_count);
+	atomic_inc(&base->block->open_count);
 	if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) {
 		rc = -ENODEV;
 		goto unlock;
@@ -2355,21 +2363,28 @@ static int dasd_open(struct block_device *bdev, fmode_t mode)
 		goto out;
 	}
 
+	dasd_put_device(base);
 	return 0;
 
 out:
 	module_put(base->discipline->owner);
 unlock:
-	atomic_dec(&block->open_count);
+	atomic_dec(&base->block->open_count);
+	dasd_put_device(base);
 	return rc;
 }
 
 static int dasd_release(struct gendisk *disk, fmode_t mode)
 {
-	struct dasd_block *block = disk->private_data;
+	struct dasd_device *base;
 
-	atomic_dec(&block->open_count);
-	module_put(block->base->discipline->owner);
+	base = dasd_device_from_gendisk(disk);
+	if (!base)
+		return -ENODEV;
+
+	atomic_dec(&base->block->open_count);
+	module_put(base->discipline->owner);
+	dasd_put_device(base);
 	return 0;
 }
 
@@ -2378,20 +2393,20 @@ static int dasd_release(struct gendisk *disk, fmode_t mode)
  */
 static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct dasd_block *block;
 	struct dasd_device *base;
 
-	block = bdev->bd_disk->private_data;
-	if (!block)
+	base = dasd_device_from_gendisk(bdev->bd_disk);
+	if (!base)
 		return -ENODEV;
-	base = block->base;
 
 	if (!base->discipline ||
-	    !base->discipline->fill_geometry)
+	    !base->discipline->fill_geometry) {
+		dasd_put_device(base);
 		return -EINVAL;
-
-	base->discipline->fill_geometry(block, geo);
-	geo->start = get_start_sect(bdev) >> block->s2b_shift;
+	}
+	base->discipline->fill_geometry(base->block, geo);
+	geo->start = get_start_sect(bdev) >> base->block->s2b_shift;
+	dasd_put_device(base);
 	return 0;
 }
 
@@ -2528,7 +2543,6 @@ void dasd_generic_remove(struct ccw_device *cdev)
 	dasd_set_target_state(device, DASD_STATE_NEW);
 	/* dasd_delete_device destroys the device reference. */
 	block = device->block;
-	device->block = NULL;
 	dasd_delete_device(device);
 	/*
 	 * life cycle of block is bound to device, so delete it after
@@ -2650,7 +2664,6 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
 	dasd_set_target_state(device, DASD_STATE_NEW);
 	/* dasd_delete_device destroys the device reference. */
 	block = device->block;
-	device->block = NULL;
 	dasd_delete_device(device);
 	/*
 	 * life cycle of block is bound to device, so delete it after
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 42e1bf35f689..d71511c7850a 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -674,6 +674,36 @@ dasd_device_from_cdev(struct ccw_device *cdev)
 	return device;
 }
 
+void dasd_add_link_to_gendisk(struct gendisk *gdp, struct dasd_device *device)
+{
+	struct dasd_devmap *devmap;
+
+	devmap = dasd_find_busid(dev_name(&device->cdev->dev));
+	if (IS_ERR(devmap))
+		return;
+	spin_lock(&dasd_devmap_lock);
+	gdp->private_data = devmap;
+	spin_unlock(&dasd_devmap_lock);
+}
+
+struct dasd_device *dasd_device_from_gendisk(struct gendisk *gdp)
+{
+	struct dasd_device *device;
+	struct dasd_devmap *devmap;
+
+	if (!gdp->private_data)
+		return NULL;
+	device = NULL;
+	spin_lock(&dasd_devmap_lock);
+	devmap = gdp->private_data;
+	if (devmap && devmap->device) {
+		device = devmap->device;
+		dasd_get_device(device);
+	}
+	spin_unlock(&dasd_devmap_lock);
+	return device;
+}
+
 /*
  * SECTION: files in sysfs
  */
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 29143eda9dd9..85dddb1e4126 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -239,7 +239,6 @@ static void dasd_ext_handler(unsigned int ext_int_code,
 	addr_t ip;
 	int rc;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_DSD]++;
 	switch (ext_int_code >> 24) {
 	case DASD_DIAG_CODE_31BIT:
 		ip = (addr_t) param32;
@@ -250,6 +249,7 @@ static void dasd_ext_handler(unsigned int ext_int_code,
 	default:
 		return;
 	}
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_DSD]++;
 	if (!ip) {		/* no intparm: unsolicited interrupt */
 		DBF_EVENT(DBF_NOTICE, "%s", "caught unsolicited "
 			      "interrupt");
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index db8005d9f2fd..3ebdf5f92f8f 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2037,7 +2037,7 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device,
 		return;
 
 	/* summary unit check */
-	if ((sense[7] == 0x0D) &&
+	if ((sense[27] & DASD_SENSE_BIT_0) && (sense[7] == 0x0D) &&
 	    (scsw_dstat(&irb->scsw) & DEV_STAT_UNIT_CHECK)) {
 		dasd_alias_handle_summary_unit_check(device, irb);
 		return;
@@ -2053,7 +2053,8 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device,
 	/* loss of device reservation is handled via base devices only
 	 * as alias devices may be used with several bases
 	 */
-	if (device->block && (sense[7] == 0x3F) &&
+	if (device->block && (sense[27] & DASD_SENSE_BIT_0) &&
+	    (sense[7] == 0x3F) &&
 	    (scsw_dstat(&irb->scsw) & DEV_STAT_UNIT_CHECK) &&
 	    test_bit(DASD_FLAG_IS_RESERVED, &device->flags)) {
 		if (device->features & DASD_FEATURE_FAILONSLCK)
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 5505bc07e1e7..19a1ff03d65e 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -73,7 +73,7 @@ int dasd_gendisk_alloc(struct dasd_block *block)
 	if (base->features & DASD_FEATURE_READONLY ||
 	    test_bit(DASD_FLAG_DEVICE_RO, &base->flags))
 		set_disk_ro(gdp, 1);
-	gdp->private_data = block;
+	dasd_add_link_to_gendisk(gdp, base);
 	gdp->queue = block->request_queue;
 	block->gdp = gdp;
 	set_capacity(block->gdp, 0);
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index df9f6999411d..d1e4f2c1264c 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -686,6 +686,9 @@ struct dasd_device *dasd_device_from_cdev(struct ccw_device *);
 struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *);
 struct dasd_device *dasd_device_from_devindex(int);
 
+void dasd_add_link_to_gendisk(struct gendisk *, struct dasd_device *);
+struct dasd_device *dasd_device_from_gendisk(struct gendisk *);
+
 int dasd_parse(void);
 int dasd_busid_known(const char *);
 
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 26075e95b1ba..72261e4c516d 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -42,16 +42,22 @@ dasd_ioctl_api_version(void __user *argp)
 static int
 dasd_ioctl_enable(struct block_device *bdev)
 {
-	struct dasd_block *block = bdev->bd_disk->private_data;
+	struct dasd_device *base;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	dasd_enable_device(block->base);
+	base = dasd_device_from_gendisk(bdev->bd_disk);
+	if (!base)
+		return -ENODEV;
+
+	dasd_enable_device(base);
 	/* Formatting the dasd device can change the capacity. */
 	mutex_lock(&bdev->bd_mutex);
-	i_size_write(bdev->bd_inode, (loff_t)get_capacity(block->gdp) << 9);
+	i_size_write(bdev->bd_inode,
+		     (loff_t)get_capacity(base->block->gdp) << 9);
 	mutex_unlock(&bdev->bd_mutex);
+	dasd_put_device(base);
 	return 0;
 }
 
@@ -62,11 +68,14 @@ dasd_ioctl_enable(struct block_device *bdev)
 static int
 dasd_ioctl_disable(struct block_device *bdev)
 {
-	struct dasd_block *block = bdev->bd_disk->private_data;
+	struct dasd_device *base;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
+	base = dasd_device_from_gendisk(bdev->bd_disk);
+	if (!base)
+		return -ENODEV;
 	/*
 	 * Man this is sick. We don't do a real disable but only downgrade
 	 * the device to DASD_STATE_BASIC. The reason is that dasdfmt uses
@@ -75,7 +84,7 @@ dasd_ioctl_disable(struct block_device *bdev)
 	 * using the BIODASDFMT ioctl. Therefore the correct state for the
 	 * device is DASD_STATE_BASIC that allows to do basic i/o.
 	 */
-	dasd_set_target_state(block->base, DASD_STATE_BASIC);
+	dasd_set_target_state(base, DASD_STATE_BASIC);
 	/*
 	 * Set i_size to zero, since read, write, etc. check against this
 	 * value.
@@ -83,6 +92,7 @@ dasd_ioctl_disable(struct block_device *bdev)
 	mutex_lock(&bdev->bd_mutex);
 	i_size_write(bdev->bd_inode, 0);
 	mutex_unlock(&bdev->bd_mutex);
+	dasd_put_device(base);
 	return 0;
 }
 
@@ -191,26 +201,36 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata)
 static int
 dasd_ioctl_format(struct block_device *bdev, void __user *argp)
 {
-	struct dasd_block *block = bdev->bd_disk->private_data;
+	struct dasd_device *base;
 	struct format_data_t fdata;
+	int rc;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 	if (!argp)
 		return -EINVAL;
-
-	if (block->base->features & DASD_FEATURE_READONLY ||
-	    test_bit(DASD_FLAG_DEVICE_RO, &block->base->flags))
+	base = dasd_device_from_gendisk(bdev->bd_disk);
+	if (!base)
+		return -ENODEV;
+	if (base->features & DASD_FEATURE_READONLY ||
+	    test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) {
+		dasd_put_device(base);
 		return -EROFS;
-	if (copy_from_user(&fdata, argp, sizeof(struct format_data_t)))
+	}
+	if (copy_from_user(&fdata, argp, sizeof(struct format_data_t))) {
+		dasd_put_device(base);
 		return -EFAULT;
+	}
 	if (bdev != bdev->bd_contains) {
 		pr_warning("%s: The specified DASD is a partition and cannot "
 			   "be formatted\n",
-			   dev_name(&block->base->cdev->dev));
+			   dev_name(&base->cdev->dev));
+		dasd_put_device(base);
 		return -EINVAL;
 	}
-	return dasd_format(block, &fdata);
+	rc = dasd_format(base->block, &fdata);
+	dasd_put_device(base);
+	return rc;
 }
 
 #ifdef CONFIG_DASD_PROFILE
@@ -340,8 +360,8 @@ static int dasd_ioctl_information(struct dasd_block *block,
 static int
 dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp)
 {
-	struct dasd_block *block =  bdev->bd_disk->private_data;
-	int intval;
+	struct dasd_device *base;
+	int intval, rc;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -350,10 +370,17 @@ dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp)
 		return -EINVAL;
 	if (get_user(intval, (int __user *)argp))
 		return -EFAULT;
-	if (!intval && test_bit(DASD_FLAG_DEVICE_RO, &block->base->flags))
+	base = dasd_device_from_gendisk(bdev->bd_disk);
+	if (!base)
+		return -ENODEV;
+	if (!intval && test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) {
+		dasd_put_device(base);
 		return -EROFS;
+	}
 	set_disk_ro(bdev->bd_disk, intval);
-	return dasd_set_feature(block->base->cdev, DASD_FEATURE_READONLY, intval);
+	rc = dasd_set_feature(base->cdev, DASD_FEATURE_READONLY, intval);
+	dasd_put_device(base);
+	return rc;
 }
 
 static int dasd_ioctl_readall_cmb(struct dasd_block *block, unsigned int cmd,
@@ -372,59 +399,78 @@ static int dasd_ioctl_readall_cmb(struct dasd_block *block, unsigned int cmd,
 int dasd_ioctl(struct block_device *bdev, fmode_t mode,
 	       unsigned int cmd, unsigned long arg)
 {
-	struct dasd_block *block = bdev->bd_disk->private_data;
+	struct dasd_block *block;
+	struct dasd_device *base;
 	void __user *argp;
+	int rc;
 
 	if (is_compat_task())
 		argp = compat_ptr(arg);
 	else
 		argp = (void __user *)arg;
 
-	if (!block)
-                return -ENODEV;
-
 	if ((_IOC_DIR(cmd) != _IOC_NONE) && !arg) {
 		PRINT_DEBUG("empty data ptr");
 		return -EINVAL;
 	}
 
+	base = dasd_device_from_gendisk(bdev->bd_disk);
+	if (!base)
+		return -ENODEV;
+	block = base->block;
+	rc = 0;
 	switch (cmd) {
 	case BIODASDDISABLE:
-		return dasd_ioctl_disable(bdev);
+		rc = dasd_ioctl_disable(bdev);
+		break;
 	case BIODASDENABLE:
-		return dasd_ioctl_enable(bdev);
+		rc = dasd_ioctl_enable(bdev);
+		break;
 	case BIODASDQUIESCE:
-		return dasd_ioctl_quiesce(block);
+		rc = dasd_ioctl_quiesce(block);
+		break;
 	case BIODASDRESUME:
-		return dasd_ioctl_resume(block);
+		rc = dasd_ioctl_resume(block);
+		break;
 	case BIODASDFMT:
-		return dasd_ioctl_format(bdev, argp);
+		rc = dasd_ioctl_format(bdev, argp);
+		break;
 	case BIODASDINFO:
-		return dasd_ioctl_information(block, cmd, argp);
+		rc = dasd_ioctl_information(block, cmd, argp);
+		break;
 	case BIODASDINFO2:
-		return dasd_ioctl_information(block, cmd, argp);
+		rc = dasd_ioctl_information(block, cmd, argp);
+		break;
 	case BIODASDPRRD:
-		return dasd_ioctl_read_profile(block, argp);
+		rc = dasd_ioctl_read_profile(block, argp);
+		break;
 	case BIODASDPRRST:
-		return dasd_ioctl_reset_profile(block);
+		rc = dasd_ioctl_reset_profile(block);
+		break;
 	case BLKROSET:
-		return dasd_ioctl_set_ro(bdev, argp);
+		rc = dasd_ioctl_set_ro(bdev, argp);
+		break;
 	case DASDAPIVER:
-		return dasd_ioctl_api_version(argp);
+		rc = dasd_ioctl_api_version(argp);
+		break;
 	case BIODASDCMFENABLE:
-		return enable_cmf(block->base->cdev);
+		rc = enable_cmf(base->cdev);
+		break;
 	case BIODASDCMFDISABLE:
-		return disable_cmf(block->base->cdev);
+		rc = disable_cmf(base->cdev);
+		break;
 	case BIODASDREADALLCMB:
-		return dasd_ioctl_readall_cmb(block, cmd, argp);
+		rc = dasd_ioctl_readall_cmb(block, cmd, argp);
+		break;
 	default:
 		/* if the discipline has an ioctl method try it. */
-		if (block->base->discipline->ioctl) {
-			int rval = block->base->discipline->ioctl(block, cmd, argp);
-			if (rval != -ENOIOCTLCMD)
-				return rval;
-		}
-
-		return -EINVAL;
+		if (base->discipline->ioctl) {
+			rc = base->discipline->ioctl(block, cmd, argp);
+			if (rc == -ENOIOCTLCMD)
+				rc = -EINVAL;
+		} else
+			rc = -EINVAL;
 	}
+	dasd_put_device(base);
+	return rc;
 }
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 4b60ede07f0e..be55fb2b1b1c 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -518,6 +518,8 @@ static void __init insert_increment(u16 rn, int standby, int assigned)
 		return;
 	new_incr->rn = rn;
 	new_incr->standby = standby;
+	if (!standby)
+		new_incr->usecount = 1;
 	last_rn = 0;
 	prev = &sclp_mem_list;
 	list_for_each_entry(incr, &sclp_mem_list, list) {
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 83cea9a55e2f..1b3924c2fffd 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -236,7 +236,6 @@ tapeblock_setup_device(struct tape_device * device)
 	disk->major = tapeblock_major;
 	disk->first_minor = device->first_minor;
 	disk->fops = &tapeblock_fops;
-	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	disk->private_data = tape_get_device(device);
 	disk->queue = blkdat->request_queue;
 	set_capacity(disk, 0);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index c532ba929ccd..e8f267eb8887 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -407,8 +407,11 @@ static inline void account_sbals(struct qdio_q *q, int count)
 	q->q_stats.nr_sbals[pos]++;
 }
 
-static void announce_buffer_error(struct qdio_q *q, int count)
+static void process_buffer_error(struct qdio_q *q, int count)
 {
+	unsigned char state = (q->is_input_q) ? SLSB_P_INPUT_NOT_INIT :
+					SLSB_P_OUTPUT_NOT_INIT;
+
 	q->qdio_error |= QDIO_ERROR_SLSB_STATE;
 
 	/* special handling for no target buffer empty */
@@ -426,6 +429,12 @@ static void announce_buffer_error(struct qdio_q *q, int count)
 	DBF_ERROR("F14:%2x F15:%2x",
 		  q->sbal[q->first_to_check]->element[14].flags & 0xff,
 		  q->sbal[q->first_to_check]->element[15].flags & 0xff);
+
+	/*
+	 * Interrupts may be avoided as long as the error is present
+	 * so change the buffer state immediately to avoid starvation.
+	 */
+	set_buf_states(q, q->first_to_check, state, count);
 }
 
 static inline void inbound_primed(struct qdio_q *q, int count)
@@ -506,8 +515,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 			account_sbals(q, count);
 		break;
 	case SLSB_P_INPUT_ERROR:
-		announce_buffer_error(q, count);
-		/* process the buffer, the upper layer will take care of it */
+		process_buffer_error(q, count);
 		q->first_to_check = add_buf(q->first_to_check, count);
 		atomic_sub(count, &q->nr_buf_used);
 		if (q->irq_ptr->perf_stat_enabled)
@@ -677,8 +685,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 			account_sbals(q, count);
 		break;
 	case SLSB_P_OUTPUT_ERROR:
-		announce_buffer_error(q, count);
-		/* process the buffer, the upper layer will take care of it */
+		process_buffer_error(q, count);
 		q->first_to_check = add_buf(q->first_to_check, count);
 		atomic_sub(count, &q->nr_buf_used);
 		if (q->irq_ptr->perf_stat_enabled)
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 414427d64a8f..607998f0b7d8 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -381,10 +381,10 @@ static void kvm_extint_handler(unsigned int ext_int_code,
 	u16 subcode;
 	u32 param;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_VRT]++;
 	subcode = ext_int_code >> 16;
 	if ((subcode & 0xff00) != VIRTIO_SUBCODE_64)
 		return;
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_VRT]++;
 
 	/* The LSB might be overloaded, we have to mask it */
 	vq = (struct virtqueue *)(param64 & ~1UL);
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index da7b9887ec48..f980600f78a8 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -75,8 +75,10 @@ MODULE_AUTHOR("Nick Cheng <support@areca.com.tw>");
 MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/16xx/1880) SATA/SAS RAID Host Bus Adapter");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(ARCMSR_DRIVER_VERSION);
-static int sleeptime = 10;
-static int retrycount = 12;
+
+#define	ARCMSR_SLEEPTIME	10
+#define	ARCMSR_RETRYCOUNT	12
+
 wait_queue_head_t wait_q;
 static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb,
 					struct scsi_cmnd *cmd);
@@ -171,24 +173,6 @@ static struct pci_driver arcmsr_pci_driver = {
 ****************************************************************************
 ****************************************************************************
 */
-int arcmsr_sleep_for_bus_reset(struct scsi_cmnd *cmd)
-{
-		struct Scsi_Host *shost = NULL;
-		int i, isleep;
-		shost = cmd->device->host;
-		isleep = sleeptime / 10;
-		if (isleep > 0) {
-			for (i = 0; i < isleep; i++) {
-				msleep(10000);
-			}
-		}
-
-		isleep = sleeptime % 10;
-		if (isleep > 0) {
-			msleep(isleep*1000);
-		}
-		return 0;
-}
 
 static void arcmsr_free_hbb_mu(struct AdapterControlBlock *acb)
 {
@@ -323,66 +307,64 @@ static void arcmsr_define_adapter_type(struct AdapterControlBlock *acb)
 
 	default: acb->adapter_type = ACB_ADAPTER_TYPE_A;
 	}
-}	
+}
 
 static uint8_t arcmsr_hba_wait_msgint_ready(struct AdapterControlBlock *acb)
 {
 	struct MessageUnit_A __iomem *reg = acb->pmuA;
-	uint32_t Index;
-	uint8_t Retries = 0x00;
-	do {
-		for (Index = 0; Index < 100; Index++) {
-			if (readl(&reg->outbound_intstatus) &
-					ARCMSR_MU_OUTBOUND_MESSAGE0_INT) {
-				writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT,
-					&reg->outbound_intstatus);
-				return true;
-			}
-			msleep(10);
-		}/*max 1 seconds*/
+	int i;
+
+	for (i = 0; i < 2000; i++) {
+		if (readl(&reg->outbound_intstatus) &
+				ARCMSR_MU_OUTBOUND_MESSAGE0_INT) {
+			writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT,
+				&reg->outbound_intstatus);
+			return true;
+		}
+		msleep(10);
+	} /* max 20 seconds */
 
-	} while (Retries++ < 20);/*max 20 sec*/
 	return false;
 }
 
 static uint8_t arcmsr_hbb_wait_msgint_ready(struct AdapterControlBlock *acb)
 {
 	struct MessageUnit_B *reg = acb->pmuB;
-	uint32_t Index;
-	uint8_t Retries = 0x00;
-	do {
-		for (Index = 0; Index < 100; Index++) {
-			if (readl(reg->iop2drv_doorbell)
-				& ARCMSR_IOP2DRV_MESSAGE_CMD_DONE) {
-				writel(ARCMSR_MESSAGE_INT_CLEAR_PATTERN
-					, reg->iop2drv_doorbell);
-				writel(ARCMSR_DRV2IOP_END_OF_INTERRUPT, reg->drv2iop_doorbell);
-				return true;
-			}
-			msleep(10);
-		}/*max 1 seconds*/
+	int i;
+
+	for (i = 0; i < 2000; i++) {
+		if (readl(reg->iop2drv_doorbell)
+			& ARCMSR_IOP2DRV_MESSAGE_CMD_DONE) {
+			writel(ARCMSR_MESSAGE_INT_CLEAR_PATTERN,
+					reg->iop2drv_doorbell);
+			writel(ARCMSR_DRV2IOP_END_OF_INTERRUPT,
+					reg->drv2iop_doorbell);
+			return true;
+		}
+		msleep(10);
+	} /* max 20 seconds */
 
-	} while (Retries++ < 20);/*max 20 sec*/
 	return false;
 }
 
 static uint8_t arcmsr_hbc_wait_msgint_ready(struct AdapterControlBlock *pACB)
 {
 	struct MessageUnit_C *phbcmu = (struct MessageUnit_C *)pACB->pmuC;
-	unsigned char Retries = 0x00;
-	uint32_t Index;
-	do {
-		for (Index = 0; Index < 100; Index++) {
-			if (readl(&phbcmu->outbound_doorbell) & ARCMSR_HBCMU_IOP2DRV_MESSAGE_CMD_DONE) {
-				writel(ARCMSR_HBCMU_IOP2DRV_MESSAGE_CMD_DONE_DOORBELL_CLEAR, &phbcmu->outbound_doorbell_clear);/*clear interrupt*/
-				return true;
-			}
-			/* one us delay	*/
-			msleep(10);
-		} /*max 1 seconds*/
-	} while (Retries++ < 20); /*max 20 sec*/
+	int i;
+
+	for (i = 0; i < 2000; i++) {
+		if (readl(&phbcmu->outbound_doorbell)
+				& ARCMSR_HBCMU_IOP2DRV_MESSAGE_CMD_DONE) {
+			writel(ARCMSR_HBCMU_IOP2DRV_MESSAGE_CMD_DONE_DOORBELL_CLEAR,
+				&phbcmu->outbound_doorbell_clear); /*clear interrupt*/
+			return true;
+		}
+		msleep(10);
+	} /* max 20 seconds */
+
 	return false;
 }
+
 static void arcmsr_flush_hba_cache(struct AdapterControlBlock *acb)
 {
 	struct MessageUnit_A __iomem *reg = acb->pmuA;
@@ -459,10 +441,11 @@ static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
 	struct CommandControlBlock *ccb_tmp;
 	int i = 0, j = 0;
 	dma_addr_t cdb_phyaddr;
-	unsigned long roundup_ccbsize = 0, offset;
+	unsigned long roundup_ccbsize;
 	unsigned long max_xfer_len;
 	unsigned long max_sg_entrys;
 	uint32_t  firm_config_version;
+
 	for (i = 0; i < ARCMSR_MAX_TARGETID; i++)
 		for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++)
 			acb->devstate[i][j] = ARECA_RAID_GONE;
@@ -472,23 +455,20 @@ static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
 	firm_config_version = acb->firm_cfg_version;
 	if((firm_config_version & 0xFF) >= 3){
 		max_xfer_len = (ARCMSR_CDB_SG_PAGE_LENGTH << ((firm_config_version >> 8) & 0xFF)) * 1024;/* max 4M byte */
-		max_sg_entrys = (max_xfer_len/4096);	
+		max_sg_entrys = (max_xfer_len/4096);
 	}
 	acb->host->max_sectors = max_xfer_len/512;
 	acb->host->sg_tablesize = max_sg_entrys;
 	roundup_ccbsize = roundup(sizeof(struct CommandControlBlock) + (max_sg_entrys - 1) * sizeof(struct SG64ENTRY), 32);
-	acb->uncache_size = roundup_ccbsize * ARCMSR_MAX_FREECCB_NUM + 32;
+	acb->uncache_size = roundup_ccbsize * ARCMSR_MAX_FREECCB_NUM;
 	dma_coherent = dma_alloc_coherent(&pdev->dev, acb->uncache_size, &dma_coherent_handle, GFP_KERNEL);
 	if(!dma_coherent){
-		printk(KERN_NOTICE "arcmsr%d: dma_alloc_coherent got error \n", acb->host->host_no);
+		printk(KERN_NOTICE "arcmsr%d: dma_alloc_coherent got error\n", acb->host->host_no);
 		return -ENOMEM;
 	}
 	acb->dma_coherent = dma_coherent;
 	acb->dma_coherent_handle = dma_coherent_handle;
 	memset(dma_coherent, 0, acb->uncache_size);
-	offset = roundup((unsigned long)dma_coherent, 32) - (unsigned long)dma_coherent;
-	dma_coherent_handle = dma_coherent_handle + offset;
-	dma_coherent = (struct CommandControlBlock *)dma_coherent + offset;
 	ccb_tmp = dma_coherent;
 	acb->vir2phy_offset = (unsigned long)dma_coherent - (unsigned long)dma_coherent_handle;
 	for(i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++){
@@ -2602,12 +2582,8 @@ static int arcmsr_iop_confirm(struct AdapterControlBlock *acb)
 		if (cdb_phyaddr_hi32 != 0) {
 			struct MessageUnit_C *reg = (struct MessageUnit_C *)acb->pmuC;
 
-			if (cdb_phyaddr_hi32 != 0) {
-				unsigned char Retries = 0x00;
-				do {
-					printk(KERN_NOTICE "arcmsr%d: cdb_phyaddr_hi32=0x%x \n", acb->adapter_index, cdb_phyaddr_hi32);
-				} while (Retries++ < 100);
-			}
+			printk(KERN_NOTICE "arcmsr%d: cdb_phyaddr_hi32=0x%x\n",
+					acb->adapter_index, cdb_phyaddr_hi32);
 			writel(ARCMSR_SIGNATURE_SET_CONFIG, &reg->msgcode_rwbuffer[0]);
 			writel(cdb_phyaddr_hi32, &reg->msgcode_rwbuffer[1]);
 			writel(ARCMSR_INBOUND_MESG0_SET_CONFIG, &reg->inbound_msgaddr0);
@@ -2955,12 +2931,12 @@ static int arcmsr_bus_reset(struct scsi_cmnd *cmd)
 				arcmsr_hardware_reset(acb);
 				acb->acb_flags &= ~ACB_F_IOP_INITED;
 sleep_again:
-				arcmsr_sleep_for_bus_reset(cmd);
+				ssleep(ARCMSR_SLEEPTIME);
 				if ((readl(&reg->outbound_msgaddr1) & ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK) == 0) {
-					printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, retry=%d \n", acb->host->host_no, retry_count);
-					if (retry_count > retrycount) {
+					printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, retry=%d\n", acb->host->host_no, retry_count);
+					if (retry_count > ARCMSR_RETRYCOUNT) {
 						acb->fw_flag = FW_DEADLOCK;
-						printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, RETRY TERMINATED!! \n", acb->host->host_no);
+						printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, RETRY TERMINATED!!\n", acb->host->host_no);
 						return FAILED;
 					}
 					retry_count++;
@@ -3025,12 +3001,12 @@ sleep_again:
 				arcmsr_hardware_reset(acb);
 				acb->acb_flags &= ~ACB_F_IOP_INITED;
 sleep:
-				arcmsr_sleep_for_bus_reset(cmd);
+				ssleep(ARCMSR_SLEEPTIME);
 				if ((readl(&reg->host_diagnostic) & 0x04) != 0) {
-					printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, retry=%d \n", acb->host->host_no, retry_count);
-					if (retry_count > retrycount) {
+					printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, retry=%d\n", acb->host->host_no, retry_count);
+					if (retry_count > ARCMSR_RETRYCOUNT) {
 						acb->fw_flag = FW_DEADLOCK;
-						printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, RETRY TERMINATED!! \n", acb->host->host_no);
+						printk(KERN_ERR "arcmsr%d: waiting for hw bus reset return, RETRY TERMINATED!!\n", acb->host->host_no);
 						return FAILED;
 					}
 					retry_count++;
diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h
index 1cb8a5e85c7f..1d7b976c850f 100644
--- a/drivers/scsi/be2iscsi/be.h
+++ b/drivers/scsi/be2iscsi/be.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -8,11 +8,11 @@
  * Public License is included in this distribution in the file called COPYING.
  *
  * Contact Information:
- * linux-drivers@serverengines.com
+ * linux-drivers@emulex.com
  *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #ifndef BEISCSI_H
diff --git a/drivers/scsi/be2iscsi/be_cmds.c b/drivers/scsi/be2iscsi/be_cmds.c
index ad246369d373..b8a82f2c62c8 100644
--- a/drivers/scsi/be2iscsi/be_cmds.c
+++ b/drivers/scsi/be2iscsi/be_cmds.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -8,11 +8,11 @@
  * Public License is included in this distribution in the file called COPYING.
  *
  * Contact Information:
- * linux-drivers@serverengines.com
+ * linux-drivers@emulex.com
  *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #include "be.h"
@@ -458,6 +458,7 @@ void be_cmd_hdr_prepare(struct be_cmd_req_hdr *req_hdr,
 	req_hdr->opcode = opcode;
 	req_hdr->subsystem = subsystem;
 	req_hdr->request_length = cpu_to_le32(cmd_len - sizeof(*req_hdr));
+	req_hdr->timeout = 120;
 }
 
 static void be_cmd_page_addrs_prepare(struct phys_addr *pages, u32 max_pages,
diff --git a/drivers/scsi/be2iscsi/be_cmds.h b/drivers/scsi/be2iscsi/be_cmds.h
index fbd1dc2c15f7..497eb29e5c9e 100644
--- a/drivers/scsi/be2iscsi/be_cmds.h
+++ b/drivers/scsi/be2iscsi/be_cmds.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -8,11 +8,11 @@
  * Public License is included in this distribution in the file called COPYING.
  *
  * Contact Information:
- * linux-drivers@serverengines.com
+ * linux-drivers@emulex.com
  *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #ifndef BEISCSI_CMDS_H
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index 868cc5590145..3cad10605023 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,15 +7,14 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohank@serverengines.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
  *
  * Contact Information:
- * linux-drivers@serverengines.com
- *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * linux-drivers@emulex.com
  *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #include <scsi/libiscsi.h>
diff --git a/drivers/scsi/be2iscsi/be_iscsi.h b/drivers/scsi/be2iscsi/be_iscsi.h
index 9c532797c29e..ff60b7fd92d6 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.h
+++ b/drivers/scsi/be2iscsi/be_iscsi.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,15 +7,14 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohank@serverengines.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
  *
  * Contact Information:
- * linux-drivers@serverengines.com
- *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * linux-drivers@emulex.com
  *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #ifndef _BE_ISCSI_
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 24e20ba9633c..cea9b275965c 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,16 +7,16 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohank@serverengines.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
  *
  * Contact Information:
- * linux-drivers@serverengines.com
- *
- *  ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * linux-drivers@emulex.com
  *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
+
 #include <linux/reboot.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
@@ -420,7 +420,8 @@ static int beiscsi_setup_boot_info(struct beiscsi_hba *phba)
 	return 0;
 
 free_kset:
-	iscsi_boot_destroy_kset(phba->boot_kset);
+	if (phba->boot_kset)
+		iscsi_boot_destroy_kset(phba->boot_kset);
 	return -ENOMEM;
 }
 
@@ -3464,23 +3465,23 @@ static void hwi_enable_intr(struct beiscsi_hba *phba)
 	addr = (u8 __iomem *) ((u8 __iomem *) ctrl->pcicfg +
 			PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET);
 	reg = ioread32(addr);
-	SE_DEBUG(DBG_LVL_8, "reg =x%08x\n", reg);
 
 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 	if (!enabled) {
 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 		SE_DEBUG(DBG_LVL_8, "reg =x%08x addr=%p\n", reg, addr);
 		iowrite32(reg, addr);
-		if (!phba->msix_enabled) {
-			eq = &phwi_context->be_eq[0].q;
+	}
+
+	if (!phba->msix_enabled) {
+		eq = &phwi_context->be_eq[0].q;
+		SE_DEBUG(DBG_LVL_8, "eq->id=%d\n", eq->id);
+		hwi_ring_eq_db(phba, eq->id, 0, 0, 1, 1);
+	} else {
+		for (i = 0; i <= phba->num_cpus; i++) {
+			eq = &phwi_context->be_eq[i].q;
 			SE_DEBUG(DBG_LVL_8, "eq->id=%d\n", eq->id);
 			hwi_ring_eq_db(phba, eq->id, 0, 0, 1, 1);
-		} else {
-			for (i = 0; i <= phba->num_cpus; i++) {
-				eq = &phwi_context->be_eq[i].q;
-				SE_DEBUG(DBG_LVL_8, "eq->id=%d\n", eq->id);
-				hwi_ring_eq_db(phba, eq->id, 0, 0, 1, 1);
-			}
 		}
 	}
 }
@@ -4019,12 +4020,17 @@ static int beiscsi_mtask(struct iscsi_task *task)
 		hwi_write_buffer(pwrb, task);
 		break;
 	case ISCSI_OP_NOOP_OUT:
-		AMAP_SET_BITS(struct amap_iscsi_wrb, type, pwrb,
-			      INI_RD_CMD);
-		if (task->hdr->ttt == ISCSI_RESERVED_TAG)
+		if (task->hdr->ttt != ISCSI_RESERVED_TAG) {
+			AMAP_SET_BITS(struct amap_iscsi_wrb, type, pwrb,
+				      TGT_DM_CMD);
+			AMAP_SET_BITS(struct amap_iscsi_wrb, cmdsn_itt,
+				      pwrb, 0);
 			AMAP_SET_BITS(struct amap_iscsi_wrb, dmsg, pwrb, 0);
-		else
+		} else {
+			AMAP_SET_BITS(struct amap_iscsi_wrb, type, pwrb,
+				      INI_RD_CMD);
 			AMAP_SET_BITS(struct amap_iscsi_wrb, dmsg, pwrb, 1);
+		}
 		hwi_write_buffer(pwrb, task);
 		break;
 	case ISCSI_OP_TEXT:
@@ -4144,10 +4150,11 @@ static void beiscsi_remove(struct pci_dev *pcidev)
 			    phba->ctrl.mbox_mem_alloced.size,
 			    phba->ctrl.mbox_mem_alloced.va,
 			    phba->ctrl.mbox_mem_alloced.dma);
+	if (phba->boot_kset)
+		iscsi_boot_destroy_kset(phba->boot_kset);
 	iscsi_host_remove(phba->shost);
 	pci_dev_put(phba->pcidev);
 	iscsi_host_free(phba->shost);
-	iscsi_boot_destroy_kset(phba->boot_kset);
 }
 
 static void beiscsi_msix_enable(struct beiscsi_hba *phba)
diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 90eb74f6bcab..081c171a1ed6 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,15 +7,14 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohank@serverengines.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
  *
  * Contact Information:
- * linux-drivers@serverengines.com
- *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * linux-drivers@emulex.com
  *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #ifndef _BEISCSI_MAIN_
@@ -35,7 +34,7 @@
 
 #include "be.h"
 #define DRV_NAME		"be2iscsi"
-#define BUILD_STR		"2.0.549.0"
+#define BUILD_STR		"2.103.298.0"
 #define BE_NAME			"ServerEngines BladeEngine2" \
 				"Linux iSCSI Driver version" BUILD_STR
 #define DRV_DESC		BE_NAME " " "Driver"
diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c
index 877324fc594c..44762cfa3e12 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.c
+++ b/drivers/scsi/be2iscsi/be_mgmt.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,15 +7,14 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohank@serverengines.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
  *
  * Contact Information:
- * linux-drivers@serverengines.com
- *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * linux-drivers@emulex.com
  *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #include "be_mgmt.h"
@@ -203,8 +202,8 @@ int mgmt_epfw_cleanup(struct beiscsi_hba *phba, unsigned short chute)
 			   OPCODE_COMMON_ISCSI_CLEANUP, sizeof(*req));
 
 	req->chute = chute;
-	req->hdr_ring_id = 0;
-	req->data_ring_id = 0;
+	req->hdr_ring_id = cpu_to_le16(HWI_GET_DEF_HDRQ_ID(phba));
+	req->data_ring_id = cpu_to_le16(HWI_GET_DEF_BUFQ_ID(phba));
 
 	status =  be_mcc_notify_wait(phba);
 	if (status)
diff --git a/drivers/scsi/be2iscsi/be_mgmt.h b/drivers/scsi/be2iscsi/be_mgmt.h
index b9acedf78653..08428824ace2 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.h
+++ b/drivers/scsi/be2iscsi/be_mgmt.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2005 - 2010 ServerEngines
+ * Copyright (C) 2005 - 2011 Emulex
  * All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
@@ -7,15 +7,14 @@
  * as published by the Free Software Foundation.  The full GNU General
  * Public License is included in this distribution in the file called COPYING.
  *
- * Written by: Jayamohan Kallickal (jayamohank@serverengines.com)
+ * Written by: Jayamohan Kallickal (jayamohan.kallickal@emulex.com)
  *
  * Contact Information:
- * linux-drivers@serverengines.com
- *
- * ServerEngines
- * 209 N. Fair Oaks Ave
- * Sunnyvale, CA 94085
+ * linux-drivers@emulex.com
  *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
  */
 
 #ifndef _BEISCSI_MGMT_
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 0fd510a01561..59b5e9b61d71 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -57,9 +57,19 @@ int		pcie_max_read_reqsz;
 int		bfa_debugfs_enable = 1;
 int		msix_disable_cb = 0, msix_disable_ct = 0;
 
+/* Firmware releated */
 u32	bfi_image_ct_fc_size, bfi_image_ct_cna_size, bfi_image_cb_fc_size;
 u32     *bfi_image_ct_fc, *bfi_image_ct_cna, *bfi_image_cb_fc;
 
+#define BFAD_FW_FILE_CT_FC      "ctfw_fc.bin"
+#define BFAD_FW_FILE_CT_CNA     "ctfw_cna.bin"
+#define BFAD_FW_FILE_CB_FC      "cbfw_fc.bin"
+
+static u32 *bfad_load_fwimg(struct pci_dev *pdev);
+static void bfad_free_fwimg(void);
+static void bfad_read_firmware(struct pci_dev *pdev, u32 **bfi_image,
+		u32 *bfi_image_size, char *fw_name);
+
 static const char *msix_name_ct[] = {
 	"cpe0", "cpe1", "cpe2", "cpe3",
 	"rme0", "rme1", "rme2", "rme3",
@@ -222,6 +232,9 @@ bfad_sm_created(struct bfad_s *bfad, enum bfad_sm_event event)
 		if ((bfad->bfad_flags & BFAD_HAL_INIT_DONE)) {
 			bfa_sm_send_event(bfad, BFAD_E_INIT_SUCCESS);
 		} else {
+			printk(KERN_WARNING
+				"bfa %s: bfa init failed\n",
+				bfad->pci_name);
 			bfad->bfad_flags |= BFAD_HAL_INIT_FAIL;
 			bfa_sm_send_event(bfad, BFAD_E_INIT_FAILED);
 		}
@@ -991,10 +1004,6 @@ bfad_cfg_pport(struct bfad_s *bfad, enum bfa_lport_role role)
 		bfad->pport.roles |= BFA_LPORT_ROLE_FCP_IM;
 	}
 
-	/* Setup the debugfs node for this scsi_host */
-	if (bfa_debugfs_enable)
-		bfad_debugfs_init(&bfad->pport);
-
 	bfad->bfad_flags |= BFAD_CFG_PPORT_DONE;
 
 out:
@@ -1004,10 +1013,6 @@ out:
 void
 bfad_uncfg_pport(struct bfad_s *bfad)
 {
-	/* Remove the debugfs node for this scsi_host */
-	kfree(bfad->regdata);
-	bfad_debugfs_exit(&bfad->pport);
-
 	if ((supported_fc4s & BFA_LPORT_ROLE_FCP_IM) &&
 	    (bfad->pport.roles & BFA_LPORT_ROLE_FCP_IM)) {
 		bfad_im_scsi_host_free(bfad, bfad->pport.im_port);
@@ -1389,6 +1394,10 @@ bfad_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 	bfad->pport.bfad = bfad;
 	INIT_LIST_HEAD(&bfad->pbc_vport_list);
 
+	/* Setup the debugfs node for this bfad */
+	if (bfa_debugfs_enable)
+		bfad_debugfs_init(&bfad->pport);
+
 	retval = bfad_drv_init(bfad);
 	if (retval != BFA_STATUS_OK)
 		goto out_drv_init_failure;
@@ -1404,6 +1413,9 @@ out_bfad_sm_failure:
 	bfa_detach(&bfad->bfa);
 	bfad_hal_mem_release(bfad);
 out_drv_init_failure:
+	/* Remove the debugfs node for this bfad */
+	kfree(bfad->regdata);
+	bfad_debugfs_exit(&bfad->pport);
 	mutex_lock(&bfad_mutex);
 	bfad_inst--;
 	list_del(&bfad->list_entry);
@@ -1445,6 +1457,10 @@ bfad_pci_remove(struct pci_dev *pdev)
 	spin_unlock_irqrestore(&bfad->bfad_lock, flags);
 	bfad_hal_mem_release(bfad);
 
+	/* Remove the debugfs node for this bfad */
+	kfree(bfad->regdata);
+	bfad_debugfs_exit(&bfad->pport);
+
 	/* Cleaning the BFAD instance */
 	mutex_lock(&bfad_mutex);
 	bfad_inst--;
@@ -1550,7 +1566,7 @@ bfad_exit(void)
 }
 
 /* Firmware handling */
-u32 *
+static void
 bfad_read_firmware(struct pci_dev *pdev, u32 **bfi_image,
 		u32 *bfi_image_size, char *fw_name)
 {
@@ -1558,27 +1574,25 @@ bfad_read_firmware(struct pci_dev *pdev, u32 **bfi_image,
 
 	if (request_firmware(&fw, fw_name, &pdev->dev)) {
 		printk(KERN_ALERT "Can't locate firmware %s\n", fw_name);
-		goto error;
+		*bfi_image = NULL;
+		goto out;
 	}
 
 	*bfi_image = vmalloc(fw->size);
 	if (NULL == *bfi_image) {
 		printk(KERN_ALERT "Fail to allocate buffer for fw image "
 			"size=%x!\n", (u32) fw->size);
-		goto error;
+		goto out;
 	}
 
 	memcpy(*bfi_image, fw->data, fw->size);
 	*bfi_image_size = fw->size/sizeof(u32);
-
-	return *bfi_image;
-
-error:
-	return NULL;
+out:
+	release_firmware(fw);
 }
 
-u32 *
-bfad_get_firmware_buf(struct pci_dev *pdev)
+static u32 *
+bfad_load_fwimg(struct pci_dev *pdev)
 {
 	if (pdev->device == BFA_PCI_DEVICE_ID_CT_FC) {
 		if (bfi_image_ct_fc_size == 0)
@@ -1598,6 +1612,17 @@ bfad_get_firmware_buf(struct pci_dev *pdev)
 	}
 }
 
+static void
+bfad_free_fwimg(void)
+{
+	if (bfi_image_ct_fc_size && bfi_image_ct_fc)
+		vfree(bfi_image_ct_fc);
+	if (bfi_image_ct_cna_size && bfi_image_ct_cna)
+		vfree(bfi_image_ct_cna);
+	if (bfi_image_cb_fc_size && bfi_image_cb_fc)
+		vfree(bfi_image_cb_fc);
+}
+
 module_init(bfad_init);
 module_exit(bfad_exit);
 MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/bfa/bfad_debugfs.c b/drivers/scsi/bfa/bfad_debugfs.c
index c66e32eced7b..48be0c54f2de 100644
--- a/drivers/scsi/bfa/bfad_debugfs.c
+++ b/drivers/scsi/bfa/bfad_debugfs.c
@@ -28,10 +28,10 @@
  * mount -t debugfs none /sys/kernel/debug
  *
  * BFA Hierarchy:
- *	- bfa/host#
- * where the host number corresponds to the one under /sys/class/scsi_host/host#
+ *	- bfa/pci_dev:<pci_name>
+ * where the pci_name corresponds to the one under /sys/bus/pci/drivers/bfa
  *
- * Debugging service available per host:
+ * Debugging service available per pci_dev:
  * fwtrc:  To collect current firmware trace.
  * drvtrc: To collect current driver trace
  * fwsave: To collect last saved fw trace as a result of firmware crash.
@@ -489,11 +489,9 @@ static atomic_t bfa_debugfs_port_count;
 inline void
 bfad_debugfs_init(struct bfad_port_s *port)
 {
-	struct bfad_im_port_s *im_port = port->im_port;
-	struct bfad_s *bfad = im_port->bfad;
-	struct Scsi_Host *shost = im_port->shost;
+	struct bfad_s *bfad = port->bfad;
 	const struct bfad_debugfs_entry *file;
-	char name[16];
+	char name[64];
 	int i;
 
 	if (!bfa_debugfs_enable)
@@ -510,17 +508,15 @@ bfad_debugfs_init(struct bfad_port_s *port)
 		}
 	}
 
-	/*
-	 * Setup the host# directory for the port,
-	 * corresponds to the scsi_host num of this port.
-	 */
-	snprintf(name, sizeof(name), "host%d", shost->host_no);
+	/* Setup the pci_dev debugfs directory for the port */
+	snprintf(name, sizeof(name), "pci_dev:%s", bfad->pci_name);
 	if (!port->port_debugfs_root) {
 		port->port_debugfs_root =
 			debugfs_create_dir(name, bfa_debugfs_root);
 		if (!port->port_debugfs_root) {
 			printk(KERN_WARNING
-				"BFA host root dir creation failed\n");
+				"bfa %s: debugfs root creation failed\n",
+				bfad->pci_name);
 			goto err;
 		}
 
@@ -536,8 +532,8 @@ bfad_debugfs_init(struct bfad_port_s *port)
 							file->fops);
 			if (!bfad->bfad_dentry_files[i]) {
 				printk(KERN_WARNING
-					"BFA host%d: create %s entry failed\n",
-					shost->host_no, file->name);
+					"bfa %s: debugfs %s creation failed\n",
+					bfad->pci_name, file->name);
 				goto err;
 			}
 		}
@@ -550,8 +546,7 @@ err:
 inline void
 bfad_debugfs_exit(struct bfad_port_s *port)
 {
-	struct bfad_im_port_s *im_port = port->im_port;
-	struct bfad_s *bfad = im_port->bfad;
+	struct bfad_s *bfad = port->bfad;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(bfad_debugfs_files); i++) {
@@ -562,9 +557,7 @@ bfad_debugfs_exit(struct bfad_port_s *port)
 	}
 
 	/*
-	 * Remove the host# directory for the port,
-	 * corresponds to the scsi_host num of this port.
-	*/
+	 * Remove the pci_dev debugfs directory for the port */
 	if (port->port_debugfs_root) {
 		debugfs_remove(port->port_debugfs_root);
 		port->port_debugfs_root = NULL;
diff --git a/drivers/scsi/bfa/bfad_im.h b/drivers/scsi/bfa/bfad_im.h
index bfee63b16fa9..c296c8968511 100644
--- a/drivers/scsi/bfa/bfad_im.h
+++ b/drivers/scsi/bfa/bfad_im.h
@@ -141,29 +141,4 @@ extern struct device_attribute *bfad_im_vport_attrs[];
 
 irqreturn_t bfad_intx(int irq, void *dev_id);
 
-/* Firmware releated */
-#define BFAD_FW_FILE_CT_FC      "ctfw_fc.bin"
-#define BFAD_FW_FILE_CT_CNA     "ctfw_cna.bin"
-#define BFAD_FW_FILE_CB_FC      "cbfw_fc.bin"
-
-u32 *bfad_get_firmware_buf(struct pci_dev *pdev);
-u32 *bfad_read_firmware(struct pci_dev *pdev, u32 **bfi_image,
-		u32 *bfi_image_size, char *fw_name);
-
-static inline u32 *
-bfad_load_fwimg(struct pci_dev *pdev)
-{
-	return bfad_get_firmware_buf(pdev);
-}
-
-static inline void
-bfad_free_fwimg(void)
-{
-	if (bfi_image_ct_fc_size && bfi_image_ct_fc)
-		vfree(bfi_image_ct_fc);
-	if (bfi_image_ct_cna_size && bfi_image_ct_cna)
-		vfree(bfi_image_ct_cna);
-	if (bfi_image_cb_fc_size && bfi_image_cb_fc)
-		vfree(bfi_image_cb_fc);
-}
 #endif
diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h
index b6d350ac4288..0a404bfb44fe 100644
--- a/drivers/scsi/bnx2fc/bnx2fc.h
+++ b/drivers/scsi/bnx2fc/bnx2fc.h
@@ -130,7 +130,7 @@
 #define BNX2FC_TM_TIMEOUT		60	/* secs */
 #define BNX2FC_IO_TIMEOUT		20000UL	/* msecs */
 
-#define BNX2FC_WAIT_CNT			120
+#define BNX2FC_WAIT_CNT			1200
 #define BNX2FC_FW_TIMEOUT		(3 * HZ)
 #define PORT_MAX			2
 
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index cd050196a163..ab255fbc7f36 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -1133,7 +1133,7 @@ static void bnx2fc_interface_release(struct kref *kref)
 	struct net_device *phys_dev;
 
 	hba = container_of(kref, struct bnx2fc_hba, kref);
-	BNX2FC_HBA_DBG(hba->ctlr.lp, "Interface is being released\n");
+	BNX2FC_MISC_DBG("Interface is being released\n");
 
 	netdev = hba->netdev;
 	phys_dev = hba->phys_dev;
@@ -1257,20 +1257,17 @@ setup_err:
 static struct fc_lport *bnx2fc_if_create(struct bnx2fc_hba *hba,
 				  struct device *parent, int npiv)
 {
-	struct fc_lport		*lport = NULL;
+	struct fc_lport		*lport, *n_port;
 	struct fcoe_port	*port;
 	struct Scsi_Host	*shost;
 	struct fc_vport		*vport = dev_to_vport(parent);
 	int			rc = 0;
 
 	/* Allocate Scsi_Host structure */
-	if (!npiv) {
-		lport = libfc_host_alloc(&bnx2fc_shost_template,
-					  sizeof(struct fcoe_port));
-	} else {
-		lport = libfc_vport_create(vport,
-					   sizeof(struct fcoe_port));
-	}
+	if (!npiv)
+		lport = libfc_host_alloc(&bnx2fc_shost_template, sizeof(*port));
+	else
+		lport = libfc_vport_create(vport, sizeof(*port));
 
 	if (!lport) {
 		printk(KERN_ERR PFX "could not allocate scsi host structure\n");
@@ -1288,7 +1285,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_hba *hba,
 		goto lp_config_err;
 
 	if (npiv) {
-		vport = dev_to_vport(parent);
 		printk(KERN_ERR PFX "Setting vport names, 0x%llX 0x%llX\n",
 			vport->node_name, vport->port_name);
 		fc_set_wwnn(lport, vport->node_name);
@@ -1317,12 +1313,17 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_hba *hba,
 	fc_host_port_type(lport->host) = FC_PORTTYPE_UNKNOWN;
 
 	/* Allocate exchange manager */
-	if (!npiv) {
+	if (!npiv)
 		rc = bnx2fc_em_config(lport);
-		if (rc) {
-			printk(KERN_ERR PFX "Error on bnx2fc_em_config\n");
-			goto shost_err;
-		}
+	else {
+		shost = vport_to_shost(vport);
+		n_port = shost_priv(shost);
+		rc = fc_exch_mgr_list_clone(n_port, lport);
+	}
+
+	if (rc) {
+		printk(KERN_ERR PFX "Error on bnx2fc_em_config\n");
+		goto shost_err;
 	}
 
 	bnx2fc_interface_get(hba);
@@ -1355,8 +1356,6 @@ static void bnx2fc_if_destroy(struct fc_lport *lport)
 	/* Free existing transmit skbs */
 	fcoe_clean_pending_queue(lport);
 
-	bnx2fc_interface_put(hba);
-
 	/* Free queued packets for the receive thread */
 	bnx2fc_clean_rx_queue(lport);
 
@@ -1375,6 +1374,8 @@ static void bnx2fc_if_destroy(struct fc_lport *lport)
 
 	/* Release Scsi_Host */
 	scsi_host_put(lport->host);
+
+	bnx2fc_interface_put(hba);
 }
 
 /**
diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
index 1b680e288c56..f756d5f85c7a 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
@@ -522,6 +522,7 @@ void bnx2fc_process_l2_frame_compl(struct bnx2fc_rport *tgt,
 	fp = fc_frame_alloc(lport, payload_len);
 	if (!fp) {
 		printk(KERN_ERR PFX "fc_frame_alloc failure\n");
+		kfree(unsol_els);
 		return;
 	}
 
@@ -547,6 +548,7 @@ void bnx2fc_process_l2_frame_compl(struct bnx2fc_rport *tgt,
 				 */
 				printk(KERN_ERR PFX "dropping ELS 0x%x\n", op);
 				kfree_skb(skb);
+				kfree(unsol_els);
 				return;
 			}
 		}
@@ -563,6 +565,7 @@ void bnx2fc_process_l2_frame_compl(struct bnx2fc_rport *tgt,
 	} else {
 		BNX2FC_HBA_DBG(lport, "fh_r_ctl = 0x%x\n", fh->fh_r_ctl);
 		kfree_skb(skb);
+		kfree(unsol_els);
 	}
 }
 
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 1decefbf32e3..b5b5c346d779 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -1663,6 +1663,12 @@ int bnx2fc_queuecommand(struct Scsi_Host *host,
 	tgt = (struct bnx2fc_rport *)&rp[1];
 
 	if (!test_bit(BNX2FC_FLAG_SESSION_READY, &tgt->flags)) {
+		if (test_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags))  {
+			sc_cmd->result = DID_NO_CONNECT << 16;
+			sc_cmd->scsi_done(sc_cmd);
+			return 0;
+
+		}
 		/*
 		 * Session is not offloaded yet. Let SCSI-ml retry
 		 * the command.
diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c
index d0c82340f0e2..60d2ef291646 100644
--- a/drivers/scsi/constants.c
+++ b/drivers/scsi/constants.c
@@ -772,6 +772,7 @@ static const struct error_info additional[] =
 	{0x3802, "Esn - power management class event"},
 	{0x3804, "Esn - media class event"},
 	{0x3806, "Esn - device busy class event"},
+	{0x3807, "Thin Provisioning soft threshold reached"},
 
 	{0x3900, "Saving parameters not supported"},
 
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index b10b3841535c..f5b718d3c31b 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -778,8 +778,8 @@ static void srb_free_insert(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb)
 static void srb_waiting_insert(struct DeviceCtlBlk *dcb,
 		struct ScsiReqBlk *srb)
 {
-	dprintkdbg(DBG_0, "srb_waiting_insert: (pid#%li) <%02i-%i> srb=%p\n",
-		srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+	dprintkdbg(DBG_0, "srb_waiting_insert: (0x%p) <%02i-%i> srb=%p\n",
+		srb->cmd, dcb->target_id, dcb->target_lun, srb);
 	list_add(&srb->list, &dcb->srb_waiting_list);
 }
 
@@ -787,16 +787,16 @@ static void srb_waiting_insert(struct DeviceCtlBlk *dcb,
 static void srb_waiting_append(struct DeviceCtlBlk *dcb,
 		struct ScsiReqBlk *srb)
 {
-	dprintkdbg(DBG_0, "srb_waiting_append: (pid#%li) <%02i-%i> srb=%p\n",
-		 srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+	dprintkdbg(DBG_0, "srb_waiting_append: (0x%p) <%02i-%i> srb=%p\n",
+		 srb->cmd, dcb->target_id, dcb->target_lun, srb);
 	list_add_tail(&srb->list, &dcb->srb_waiting_list);
 }
 
 
 static void srb_going_append(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb)
 {
-	dprintkdbg(DBG_0, "srb_going_append: (pid#%li) <%02i-%i> srb=%p\n",
-		srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+	dprintkdbg(DBG_0, "srb_going_append: (0x%p) <%02i-%i> srb=%p\n",
+		srb->cmd, dcb->target_id, dcb->target_lun, srb);
 	list_add_tail(&srb->list, &dcb->srb_going_list);
 }
 
@@ -805,8 +805,8 @@ static void srb_going_remove(struct DeviceCtlBlk *dcb, struct ScsiReqBlk *srb)
 {
 	struct ScsiReqBlk *i;
 	struct ScsiReqBlk *tmp;
-	dprintkdbg(DBG_0, "srb_going_remove: (pid#%li) <%02i-%i> srb=%p\n",
-		srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+	dprintkdbg(DBG_0, "srb_going_remove: (0x%p) <%02i-%i> srb=%p\n",
+		srb->cmd, dcb->target_id, dcb->target_lun, srb);
 
 	list_for_each_entry_safe(i, tmp, &dcb->srb_going_list, list)
 		if (i == srb) {
@@ -821,8 +821,8 @@ static void srb_waiting_remove(struct DeviceCtlBlk *dcb,
 {
 	struct ScsiReqBlk *i;
 	struct ScsiReqBlk *tmp;
-	dprintkdbg(DBG_0, "srb_waiting_remove: (pid#%li) <%02i-%i> srb=%p\n",
-		srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+	dprintkdbg(DBG_0, "srb_waiting_remove: (0x%p) <%02i-%i> srb=%p\n",
+		srb->cmd, dcb->target_id, dcb->target_lun, srb);
 
 	list_for_each_entry_safe(i, tmp, &dcb->srb_waiting_list, list)
 		if (i == srb) {
@@ -836,8 +836,8 @@ static void srb_going_to_waiting_move(struct DeviceCtlBlk *dcb,
 		struct ScsiReqBlk *srb)
 {
 	dprintkdbg(DBG_0,
-		"srb_going_to_waiting_move: (pid#%li) <%02i-%i> srb=%p\n",
-		srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+		"srb_going_to_waiting_move: (0x%p) <%02i-%i> srb=%p\n",
+		srb->cmd, dcb->target_id, dcb->target_lun, srb);
 	list_move(&srb->list, &dcb->srb_waiting_list);
 }
 
@@ -846,8 +846,8 @@ static void srb_waiting_to_going_move(struct DeviceCtlBlk *dcb,
 		struct ScsiReqBlk *srb)
 {
 	dprintkdbg(DBG_0,
-		"srb_waiting_to_going_move: (pid#%li) <%02i-%i> srb=%p\n",
-		srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+		"srb_waiting_to_going_move: (0x%p) <%02i-%i> srb=%p\n",
+		srb->cmd, dcb->target_id, dcb->target_lun, srb);
 	list_move(&srb->list, &dcb->srb_going_list);
 }
 
@@ -982,8 +982,8 @@ static void build_srb(struct scsi_cmnd *cmd, struct DeviceCtlBlk *dcb,
 {
 	int nseg;
 	enum dma_data_direction dir = cmd->sc_data_direction;
-	dprintkdbg(DBG_0, "build_srb: (pid#%li) <%02i-%i>\n",
-		cmd->serial_number, dcb->target_id, dcb->target_lun);
+	dprintkdbg(DBG_0, "build_srb: (0x%p) <%02i-%i>\n",
+		cmd, dcb->target_id, dcb->target_lun);
 
 	srb->dcb = dcb;
 	srb->cmd = cmd;
@@ -1086,8 +1086,8 @@ static int dc395x_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct s
 	struct ScsiReqBlk *srb;
 	struct AdapterCtlBlk *acb =
 	    (struct AdapterCtlBlk *)cmd->device->host->hostdata;
-	dprintkdbg(DBG_0, "queue_command: (pid#%li) <%02i-%i> cmnd=0x%02x\n",
-		cmd->serial_number, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
+	dprintkdbg(DBG_0, "queue_command: (0x%p) <%02i-%i> cmnd=0x%02x\n",
+		cmd, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
 
 	/* Assume BAD_TARGET; will be cleared later */
 	cmd->result = DID_BAD_TARGET << 16;
@@ -1140,7 +1140,7 @@ static int dc395x_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct s
 		/* process immediately */
 		send_srb(acb, srb);
 	}
-	dprintkdbg(DBG_1, "queue_command: (pid#%li) done\n", cmd->serial_number);
+	dprintkdbg(DBG_1, "queue_command: (0x%p) done\n", cmd);
 	return 0;
 
 complete:
@@ -1203,9 +1203,9 @@ static void dump_register_info(struct AdapterCtlBlk *acb,
 			dprintkl(KERN_INFO, "dump: srb=%p cmd=%p OOOPS!\n",
 				srb, srb->cmd);
 		else
-			dprintkl(KERN_INFO, "dump: srb=%p cmd=%p (pid#%li) "
+			dprintkl(KERN_INFO, "dump: srb=%p cmd=%p "
 				 "cmnd=0x%02x <%02i-%i>\n",
-				srb, srb->cmd, srb->cmd->serial_number,
+				srb, srb->cmd,
 				srb->cmd->cmnd[0], srb->cmd->device->id,
 			       	srb->cmd->device->lun);
 		printk("  sglist=%p cnt=%i idx=%i len=%zu\n",
@@ -1301,8 +1301,8 @@ static int __dc395x_eh_bus_reset(struct scsi_cmnd *cmd)
 	struct AdapterCtlBlk *acb =
 		(struct AdapterCtlBlk *)cmd->device->host->hostdata;
 	dprintkl(KERN_INFO,
-		"eh_bus_reset: (pid#%li) target=<%02i-%i> cmd=%p\n",
-		cmd->serial_number, cmd->device->id, cmd->device->lun, cmd);
+		"eh_bus_reset: (0%p) target=<%02i-%i> cmd=%p\n",
+		cmd, cmd->device->id, cmd->device->lun, cmd);
 
 	if (timer_pending(&acb->waiting_timer))
 		del_timer(&acb->waiting_timer);
@@ -1368,8 +1368,8 @@ static int dc395x_eh_abort(struct scsi_cmnd *cmd)
 	    (struct AdapterCtlBlk *)cmd->device->host->hostdata;
 	struct DeviceCtlBlk *dcb;
 	struct ScsiReqBlk *srb;
-	dprintkl(KERN_INFO, "eh_abort: (pid#%li) target=<%02i-%i> cmd=%p\n",
-		cmd->serial_number, cmd->device->id, cmd->device->lun, cmd);
+	dprintkl(KERN_INFO, "eh_abort: (0x%p) target=<%02i-%i> cmd=%p\n",
+		cmd, cmd->device->id, cmd->device->lun, cmd);
 
 	dcb = find_dcb(acb, cmd->device->id, cmd->device->lun);
 	if (!dcb) {
@@ -1495,8 +1495,8 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb,
 	u16 s_stat2, return_code;
 	u8 s_stat, scsicommand, i, identify_message;
 	u8 *ptr;
-	dprintkdbg(DBG_0, "start_scsi: (pid#%li) <%02i-%i> srb=%p\n",
-		srb->cmd->serial_number, dcb->target_id, dcb->target_lun, srb);
+	dprintkdbg(DBG_0, "start_scsi: (0x%p) <%02i-%i> srb=%p\n",
+		dcb->target_id, dcb->target_lun, srb);
 
 	srb->tag_number = TAG_NONE;	/* acb->tag_max_num: had error read in eeprom */
 
@@ -1505,8 +1505,8 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb,
 	s_stat2 = DC395x_read16(acb, TRM_S1040_SCSI_STATUS);
 #if 1
 	if (s_stat & 0x20 /* s_stat2 & 0x02000 */ ) {
-		dprintkdbg(DBG_KG, "start_scsi: (pid#%li) BUSY %02x %04x\n",
-			srb->cmd->serial_number, s_stat, s_stat2);
+		dprintkdbg(DBG_KG, "start_scsi: (0x%p) BUSY %02x %04x\n",
+			s_stat, s_stat2);
 		/*
 		 * Try anyway?
 		 *
@@ -1522,16 +1522,15 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb,
 	}
 #endif
 	if (acb->active_dcb) {
-		dprintkl(KERN_DEBUG, "start_scsi: (pid#%li) Attempt to start a"
-			"command while another command (pid#%li) is active.",
-			srb->cmd->serial_number,
+		dprintkl(KERN_DEBUG, "start_scsi: (0x%p) Attempt to start a"
+			"command while another command (0x%p) is active.",
+			srb->cmd,
 			acb->active_dcb->active_srb ?
-			    acb->active_dcb->active_srb->cmd->serial_number : 0);
+			    acb->active_dcb->active_srb->cmd : 0);
 		return 1;
 	}
 	if (DC395x_read16(acb, TRM_S1040_SCSI_STATUS) & SCSIINTERRUPT) {
-		dprintkdbg(DBG_KG, "start_scsi: (pid#%li) Failed (busy)\n",
-			srb->cmd->serial_number);
+		dprintkdbg(DBG_KG, "start_scsi: (0x%p) Failed (busy)\n", srb->cmd);
 		return 1;
 	}
 	/* Allow starting of SCSI commands half a second before we allow the mid-level
@@ -1603,9 +1602,9 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb,
 			tag_number++;
 		}
 		if (tag_number >= dcb->max_command) {
-			dprintkl(KERN_WARNING, "start_scsi: (pid#%li) "
+			dprintkl(KERN_WARNING, "start_scsi: (0x%p) "
 				"Out of tags target=<%02i-%i>)\n",
-				srb->cmd->serial_number, srb->cmd->device->id,
+				srb->cmd, srb->cmd->device->id,
 				srb->cmd->device->lun);
 			srb->state = SRB_READY;
 			DC395x_write16(acb, TRM_S1040_SCSI_CONTROL,
@@ -1623,8 +1622,8 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb,
 #endif
 /*polling:*/
 	/* Send CDB ..command block ......... */
-	dprintkdbg(DBG_KG, "start_scsi: (pid#%li) <%02i-%i> cmnd=0x%02x tag=%i\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun,
+	dprintkdbg(DBG_KG, "start_scsi: (0x%p) <%02i-%i> cmnd=0x%02x tag=%i\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun,
 		srb->cmd->cmnd[0], srb->tag_number);
 	if (srb->flag & AUTO_REQSENSE) {
 		DC395x_write8(acb, TRM_S1040_SCSI_FIFO, REQUEST_SENSE);
@@ -1647,8 +1646,8 @@ static u8 start_scsi(struct AdapterCtlBlk* acb, struct DeviceCtlBlk* dcb,
 		 * we caught an interrupt (must be reset or reselection ... )
 		 * : Let's process it first!
 		 */
-		dprintkdbg(DBG_0, "start_scsi: (pid#%li) <%02i-%i> Failed - busy\n",
-			srb->cmd->serial_number, dcb->target_id, dcb->target_lun);
+		dprintkdbg(DBG_0, "start_scsi: (0x%p) <%02i-%i> Failed - busy\n",
+			srb->cmd, dcb->target_id, dcb->target_lun);
 		srb->state = SRB_READY;
 		free_tag(dcb, srb);
 		srb->msg_count = 0;
@@ -1843,7 +1842,7 @@ static irqreturn_t dc395x_interrupt(int irq, void *dev_id)
 static void msgout_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
-	dprintkdbg(DBG_0, "msgout_phase0: (pid#%li)\n", srb->cmd->serial_number);
+	dprintkdbg(DBG_0, "msgout_phase0: (0x%p)\n", srb->cmd);
 	if (srb->state & (SRB_UNEXPECT_RESEL + SRB_ABORT_SENT))
 		*pscsi_status = PH_BUS_FREE;	/*.. initial phase */
 
@@ -1857,18 +1856,18 @@ static void msgout_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 {
 	u16 i;
 	u8 *ptr;
-	dprintkdbg(DBG_0, "msgout_phase1: (pid#%li)\n", srb->cmd->serial_number);
+	dprintkdbg(DBG_0, "msgout_phase1: (0x%p)\n", srb->cmd);
 
 	clear_fifo(acb, "msgout_phase1");
 	if (!(srb->state & SRB_MSGOUT)) {
 		srb->state |= SRB_MSGOUT;
 		dprintkl(KERN_DEBUG,
-			"msgout_phase1: (pid#%li) Phase unexpected\n",
-			srb->cmd->serial_number);	/* So what ? */
+			"msgout_phase1: (0x%p) Phase unexpected\n",
+			srb->cmd);	/* So what ? */
 	}
 	if (!srb->msg_count) {
-		dprintkdbg(DBG_0, "msgout_phase1: (pid#%li) NOP msg\n",
-			srb->cmd->serial_number);
+		dprintkdbg(DBG_0, "msgout_phase1: (0x%p) NOP msg\n",
+			srb->cmd);
 		DC395x_write8(acb, TRM_S1040_SCSI_FIFO, MSG_NOP);
 		DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH);	/* it's important for atn stop */
 		DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_FIFO_OUT);
@@ -1888,7 +1887,7 @@ static void msgout_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 static void command_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
-	dprintkdbg(DBG_0, "command_phase0: (pid#%li)\n", srb->cmd->serial_number);
+	dprintkdbg(DBG_0, "command_phase0: (0x%p)\n", srb->cmd);
 	DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH);
 }
 
@@ -1899,7 +1898,7 @@ static void command_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 	struct DeviceCtlBlk *dcb;
 	u8 *ptr;
 	u16 i;
-	dprintkdbg(DBG_0, "command_phase1: (pid#%li)\n", srb->cmd->serial_number);
+	dprintkdbg(DBG_0, "command_phase1: (0x%p)\n", srb->cmd);
 
 	clear_fifo(acb, "command_phase1");
 	DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_CLRATN);
@@ -2041,8 +2040,8 @@ static void data_out_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 	struct DeviceCtlBlk *dcb = srb->dcb;
 	u16 scsi_status = *pscsi_status;
 	u32 d_left_counter = 0;
-	dprintkdbg(DBG_0, "data_out_phase0: (pid#%li) <%02i-%i>\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun);
+	dprintkdbg(DBG_0, "data_out_phase0: (0x%p) <%02i-%i>\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun);
 
 	/*
 	 * KG: We need to drain the buffers before we draw any conclusions!
@@ -2171,8 +2170,8 @@ static void data_out_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 static void data_out_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
-	dprintkdbg(DBG_0, "data_out_phase1: (pid#%li) <%02i-%i>\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun);
+	dprintkdbg(DBG_0, "data_out_phase1: (0x%p) <%02i-%i>\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun);
 	clear_fifo(acb, "data_out_phase1");
 	/* do prepare before transfer when data out phase */
 	data_io_transfer(acb, srb, XFERDATAOUT);
@@ -2183,8 +2182,8 @@ static void data_in_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 {
 	u16 scsi_status = *pscsi_status;
 
-	dprintkdbg(DBG_0, "data_in_phase0: (pid#%li) <%02i-%i>\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun);
+	dprintkdbg(DBG_0, "data_in_phase0: (0x%p) <%02i-%i>\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun);
 
 	/*
 	 * KG: DataIn is much more tricky than DataOut. When the device is finished
@@ -2204,8 +2203,8 @@ static void data_in_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		unsigned int sc, fc;
 
 		if (scsi_status & PARITYERROR) {
-			dprintkl(KERN_INFO, "data_in_phase0: (pid#%li) "
-				"Parity Error\n", srb->cmd->serial_number);
+			dprintkl(KERN_INFO, "data_in_phase0: (0x%p) "
+				"Parity Error\n", srb->cmd);
 			srb->status |= PARITY_ERROR;
 		}
 		/*
@@ -2394,8 +2393,8 @@ static void data_in_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 static void data_in_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
-	dprintkdbg(DBG_0, "data_in_phase1: (pid#%li) <%02i-%i>\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun);
+	dprintkdbg(DBG_0, "data_in_phase1: (0x%p) <%02i-%i>\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun);
 	data_io_transfer(acb, srb, XFERDATAIN);
 }
 
@@ -2406,8 +2405,8 @@ static void data_io_transfer(struct AdapterCtlBlk *acb,
 	struct DeviceCtlBlk *dcb = srb->dcb;
 	u8 bval;
 	dprintkdbg(DBG_0,
-		"data_io_transfer: (pid#%li) <%02i-%i> %c len=%i, sg=(%i/%i)\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun,
+		"data_io_transfer: (0x%p) <%02i-%i> %c len=%i, sg=(%i/%i)\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun,
 		((io_dir & DMACMD_DIR) ? 'r' : 'w'),
 		srb->total_xfer_length, srb->sg_index, srb->sg_count);
 	if (srb == acb->tmp_srb)
@@ -2579,8 +2578,8 @@ static void data_io_transfer(struct AdapterCtlBlk *acb,
 static void status_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
-	dprintkdbg(DBG_0, "status_phase0: (pid#%li) <%02i-%i>\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun);
+	dprintkdbg(DBG_0, "status_phase0: (0x%p) <%02i-%i>\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun);
 	srb->target_status = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
 	srb->end_message = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);	/* get message */
 	srb->state = SRB_COMPLETED;
@@ -2593,8 +2592,8 @@ static void status_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 static void status_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
-	dprintkdbg(DBG_0, "status_phase1: (pid#%li) <%02i-%i>\n",
-		srb->cmd->serial_number, srb->cmd->device->id, srb->cmd->device->lun);
+	dprintkdbg(DBG_0, "status_phase1: (0x%p) <%02i-%i>\n",
+		srb->cmd, srb->cmd->device->id, srb->cmd->device->lun);
 	srb->state = SRB_STATUS;
 	DC395x_write16(acb, TRM_S1040_SCSI_CONTROL, DO_DATALATCH);	/* it's important for atn stop */
 	DC395x_write8(acb, TRM_S1040_SCSI_COMMAND, SCMD_COMP);
@@ -2635,8 +2634,8 @@ static struct ScsiReqBlk *msgin_qtag(struct AdapterCtlBlk *acb,
 {
 	struct ScsiReqBlk *srb = NULL;
 	struct ScsiReqBlk *i;
-	dprintkdbg(DBG_0, "msgin_qtag: (pid#%li) tag=%i srb=%p\n",
-		   srb->cmd->serial_number, tag, srb);
+	dprintkdbg(DBG_0, "msgin_qtag: (0x%p) tag=%i srb=%p\n",
+		   srb->cmd, tag, srb);
 
 	if (!(dcb->tag_mask & (1 << tag)))
 		dprintkl(KERN_DEBUG,
@@ -2654,8 +2653,8 @@ static struct ScsiReqBlk *msgin_qtag(struct AdapterCtlBlk *acb,
 	if (!srb)
 		goto mingx0;
 
-	dprintkdbg(DBG_0, "msgin_qtag: (pid#%li) <%02i-%i>\n",
-		srb->cmd->serial_number, srb->dcb->target_id, srb->dcb->target_lun);
+	dprintkdbg(DBG_0, "msgin_qtag: (0x%p) <%02i-%i>\n",
+		srb->cmd, srb->dcb->target_id, srb->dcb->target_lun);
 	if (dcb->flag & ABORT_DEV_) {
 		/*srb->state = SRB_ABORT_SENT; */
 		enable_msgout_abort(acb, srb);
@@ -2865,7 +2864,7 @@ static void msgin_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
 	struct DeviceCtlBlk *dcb = acb->active_dcb;
-	dprintkdbg(DBG_0, "msgin_phase0: (pid#%li)\n", srb->cmd->serial_number);
+	dprintkdbg(DBG_0, "msgin_phase0: (0x%p)\n", srb->cmd);
 
 	srb->msgin_buf[acb->msg_len++] = DC395x_read8(acb, TRM_S1040_SCSI_FIFO);
 	if (msgin_completed(srb->msgin_buf, acb->msg_len)) {
@@ -2931,9 +2930,9 @@ static void msgin_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 			 * SAVE POINTER may be ignored as we have the struct
 			 * ScsiReqBlk* associated with the scsi command.
 			 */
-			dprintkdbg(DBG_0, "msgin_phase0: (pid#%li) "
+			dprintkdbg(DBG_0, "msgin_phase0: (0x%p) "
 				"SAVE POINTER rem=%i Ignore\n",
-				srb->cmd->serial_number, srb->total_xfer_length);
+				srb->cmd, srb->total_xfer_length);
 			break;
 
 		case RESTORE_POINTERS:
@@ -2941,9 +2940,9 @@ static void msgin_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 			break;
 
 		case ABORT:
-			dprintkdbg(DBG_0, "msgin_phase0: (pid#%li) "
+			dprintkdbg(DBG_0, "msgin_phase0: (0x%p) "
 				"<%02i-%i> ABORT msg\n",
-				srb->cmd->serial_number, dcb->target_id,
+				srb->cmd, dcb->target_id,
 				dcb->target_lun);
 			dcb->flag |= ABORT_DEV_;
 			enable_msgout_abort(acb, srb);
@@ -2975,7 +2974,7 @@ static void msgin_phase0(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 static void msgin_phase1(struct AdapterCtlBlk *acb, struct ScsiReqBlk *srb,
 		u16 *pscsi_status)
 {
-	dprintkdbg(DBG_0, "msgin_phase1: (pid#%li)\n", srb->cmd->serial_number);
+	dprintkdbg(DBG_0, "msgin_phase1: (0x%p)\n", srb->cmd);
 	clear_fifo(acb, "msgin_phase1");
 	DC395x_write32(acb, TRM_S1040_SCSI_COUNTER, 1);
 	if (!(srb->state & SRB_MSGIN)) {
@@ -3041,7 +3040,7 @@ static void disconnect(struct AdapterCtlBlk *acb)
 	}
 	srb = dcb->active_srb;
 	acb->active_dcb = NULL;
-	dprintkdbg(DBG_0, "disconnect: (pid#%li)\n", srb->cmd->serial_number);
+	dprintkdbg(DBG_0, "disconnect: (0x%p)\n", srb->cmd);
 
 	srb->scsi_phase = PH_BUS_FREE;	/* initial phase */
 	clear_fifo(acb, "disconnect");
@@ -3071,14 +3070,14 @@ static void disconnect(struct AdapterCtlBlk *acb)
 			    && srb->state != SRB_MSGOUT) {
 				srb->state = SRB_READY;
 				dprintkl(KERN_DEBUG,
-					"disconnect: (pid#%li) Unexpected\n",
-					srb->cmd->serial_number);
+					"disconnect: (0x%p) Unexpected\n",
+					srb->cmd);
 				srb->target_status = SCSI_STAT_SEL_TIMEOUT;
 				goto disc1;
 			} else {
 				/* Normal selection timeout */
-				dprintkdbg(DBG_KG, "disconnect: (pid#%li) "
-					"<%02i-%i> SelTO\n", srb->cmd->serial_number,
+				dprintkdbg(DBG_KG, "disconnect: (0x%p) "
+					"<%02i-%i> SelTO\n", srb->cmd,
 					dcb->target_id, dcb->target_lun);
 				if (srb->retry_count++ > DC395x_MAX_RETRIES
 				    || acb->scan_devices) {
@@ -3089,8 +3088,8 @@ static void disconnect(struct AdapterCtlBlk *acb)
 				free_tag(dcb, srb);
 				srb_going_to_waiting_move(dcb, srb);
 				dprintkdbg(DBG_KG,
-					"disconnect: (pid#%li) Retry\n",
-					srb->cmd->serial_number);
+					"disconnect: (0x%p) Retry\n",
+					srb->cmd);
 				waiting_set_timer(acb, HZ / 20);
 			}
 		} else if (srb->state & SRB_DISCONNECT) {
@@ -3142,9 +3141,9 @@ static void reselect(struct AdapterCtlBlk *acb)
 		}
 		/* Why the if ? */
 		if (!acb->scan_devices) {
-			dprintkdbg(DBG_KG, "reselect: (pid#%li) <%02i-%i> "
+			dprintkdbg(DBG_KG, "reselect: (0x%p) <%02i-%i> "
 				"Arb lost but Resel win rsel=%i stat=0x%04x\n",
-				srb->cmd->serial_number, dcb->target_id,
+				srb->cmd, dcb->target_id,
 				dcb->target_lun, rsel_tar_lun_id,
 				DC395x_read16(acb, TRM_S1040_SCSI_STATUS));
 			arblostflag = 1;
@@ -3318,7 +3317,7 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb,
 	enum dma_data_direction dir = cmd->sc_data_direction;
 	int ckc_only = 1;
 
-	dprintkdbg(DBG_1, "srb_done: (pid#%li) <%02i-%i>\n", srb->cmd->serial_number,
+	dprintkdbg(DBG_1, "srb_done: (0x%p) <%02i-%i>\n", srb->cmd,
 		srb->cmd->device->id, srb->cmd->device->lun);
 	dprintkdbg(DBG_SG, "srb_done: srb=%p sg=%i(%i/%i) buf=%p\n",
 		   srb, scsi_sg_count(cmd), srb->sg_index, srb->sg_count,
@@ -3497,9 +3496,9 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb,
 	cmd->SCp.buffers_residual = 0;
 	if (debug_enabled(DBG_KG)) {
 		if (srb->total_xfer_length)
-			dprintkdbg(DBG_KG, "srb_done: (pid#%li) <%02i-%i> "
+			dprintkdbg(DBG_KG, "srb_done: (0x%p) <%02i-%i> "
 				"cmnd=0x%02x Missed %i bytes\n",
-				cmd->serial_number, cmd->device->id, cmd->device->lun,
+				cmd, cmd->device->id, cmd->device->lun,
 				cmd->cmnd[0], srb->total_xfer_length);
 	}
 
@@ -3508,8 +3507,8 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb,
 	if (srb == acb->tmp_srb)
 		dprintkl(KERN_ERR, "srb_done: ERROR! Completed cmd with tmp_srb\n");
 	else {
-		dprintkdbg(DBG_0, "srb_done: (pid#%li) done result=0x%08x\n",
-			cmd->serial_number, cmd->result);
+		dprintkdbg(DBG_0, "srb_done: (0x%p) done result=0x%08x\n",
+			cmd, cmd->result);
 		srb_free_insert(acb, srb);
 	}
 	pci_unmap_srb(acb, srb);
@@ -3538,7 +3537,7 @@ static void doing_srb_done(struct AdapterCtlBlk *acb, u8 did_flag,
 			p = srb->cmd;
 			dir = p->sc_data_direction;
 			result = MK_RES(0, did_flag, 0, 0);
-			printk("G:%li(%02i-%i) ", p->serial_number,
+			printk("G:%p(%02i-%i) ", p,
 			       p->device->id, p->device->lun);
 			srb_going_remove(dcb, srb);
 			free_tag(dcb, srb);
@@ -3568,7 +3567,7 @@ static void doing_srb_done(struct AdapterCtlBlk *acb, u8 did_flag,
 			p = srb->cmd;
 
 			result = MK_RES(0, did_flag, 0, 0);
-			printk("W:%li<%02i-%i>", p->serial_number, p->device->id,
+			printk("W:%p<%02i-%i>", p, p->device->id,
 			       p->device->lun);
 			srb_waiting_remove(dcb, srb);
 			srb_free_insert(acb, srb);
@@ -3677,8 +3676,8 @@ static void request_sense(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb,
 		struct ScsiReqBlk *srb)
 {
 	struct scsi_cmnd *cmd = srb->cmd;
-	dprintkdbg(DBG_1, "request_sense: (pid#%li) <%02i-%i>\n",
-		cmd->serial_number, cmd->device->id, cmd->device->lun);
+	dprintkdbg(DBG_1, "request_sense: (0x%p) <%02i-%i>\n",
+		cmd, cmd->device->id, cmd->device->lun);
 
 	srb->flag |= AUTO_REQSENSE;
 	srb->adapter_status = 0;
@@ -3708,8 +3707,8 @@ static void request_sense(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb,
 
 	if (start_scsi(acb, dcb, srb)) {	/* Should only happen, if sb. else grabs the bus */
 		dprintkl(KERN_DEBUG,
-			"request_sense: (pid#%li) failed <%02i-%i>\n",
-			srb->cmd->serial_number, dcb->target_id, dcb->target_lun);
+			"request_sense: (0x%p) failed <%02i-%i>\n",
+			srb->cmd, dcb->target_id, dcb->target_lun);
 		srb_going_to_waiting_move(dcb, srb);
 		waiting_set_timer(acb, HZ / 100);
 	}
@@ -4717,13 +4716,13 @@ static int dc395x_proc_info(struct Scsi_Host *host, char *buffer,
 				dcb->target_id, dcb->target_lun,
 				list_size(&dcb->srb_waiting_list));
                 list_for_each_entry(srb, &dcb->srb_waiting_list, list)
-			SPRINTF(" %li", srb->cmd->serial_number);
+			SPRINTF(" %p", srb->cmd);
 		if (!list_empty(&dcb->srb_going_list))
 			SPRINTF("\nDCB (%02i-%i): Going  : %i:",
 				dcb->target_id, dcb->target_lun,
 				list_size(&dcb->srb_going_list));
 		list_for_each_entry(srb, &dcb->srb_going_list, list)
-			SPRINTF(" %li", srb->cmd->serial_number);
+			SPRINTF(" %p", srb->cmd);
 		if (!list_empty(&dcb->srb_waiting_list) || !list_empty(&dcb->srb_going_list))
 			SPRINTF("\n");
 	}
diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c
index 564e6ecd17c2..0119b8147797 100644
--- a/drivers/scsi/device_handler/scsi_dh.c
+++ b/drivers/scsi/device_handler/scsi_dh.c
@@ -394,12 +394,14 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data)
 	unsigned long flags;
 	struct scsi_device *sdev;
 	struct scsi_device_handler *scsi_dh = NULL;
+	struct device *dev = NULL;
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	sdev = q->queuedata;
 	if (sdev && sdev->scsi_dh_data)
 		scsi_dh = sdev->scsi_dh_data->scsi_dh;
-	if (!scsi_dh || !get_device(&sdev->sdev_gendev) ||
+	dev = get_device(&sdev->sdev_gendev);
+	if (!scsi_dh || !dev ||
 	    sdev->sdev_state == SDEV_CANCEL ||
 	    sdev->sdev_state == SDEV_DEL)
 		err = SCSI_DH_NOSYS;
@@ -410,12 +412,13 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data)
 	if (err) {
 		if (fn)
 			fn(data, err);
-		return err;
+		goto out;
 	}
 
 	if (scsi_dh->activate)
 		err = scsi_dh->activate(sdev, fn, data);
-	put_device(&sdev->sdev_gendev);
+out:
+	put_device(dev);
 	return err;
 }
 EXPORT_SYMBOL_GPL(scsi_dh_activate);
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 42fe52902add..6fec9fe5dc39 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -782,7 +782,7 @@ static int alua_bus_attach(struct scsi_device *sdev)
 	h->sdev = sdev;
 
 	err = alua_initialize(sdev, h);
-	if (err != SCSI_DH_OK)
+	if ((err != SCSI_DH_OK) && (err != SCSI_DH_DEV_OFFLINED))
 		goto failed;
 
 	if (!try_module_get(THIS_MODULE))
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 293c183dfe6d..e7fc70d6b478 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -182,14 +182,24 @@ struct rdac_dh_data {
 	struct rdac_controller	*ctlr;
 #define UNINITIALIZED_LUN	(1 << 8)
 	unsigned		lun;
+
+#define RDAC_MODE		0
+#define RDAC_MODE_AVT		1
+#define RDAC_MODE_IOSHIP	2
+	unsigned char		mode;
+
 #define RDAC_STATE_ACTIVE	0
 #define RDAC_STATE_PASSIVE	1
 	unsigned char		state;
 
 #define RDAC_LUN_UNOWNED	0
 #define RDAC_LUN_OWNED		1
-#define RDAC_LUN_AVT		2
 	char			lun_state;
+
+#define RDAC_PREFERRED		0
+#define RDAC_NON_PREFERRED	1
+	char			preferred;
+
 	unsigned char		sense[SCSI_SENSE_BUFFERSIZE];
 	union			{
 		struct c2_inquiry c2;
@@ -199,11 +209,15 @@ struct rdac_dh_data {
 	} inq;
 };
 
+static const char *mode[] = {
+	"RDAC",
+	"AVT",
+	"IOSHIP",
+};
 static const char *lun_state[] =
 {
 	"unowned",
 	"owned",
-	"owned (AVT mode)",
 };
 
 struct rdac_queue_data {
@@ -458,25 +472,33 @@ static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
 	int err;
 	struct c9_inquiry *inqp;
 
-	h->lun_state = RDAC_LUN_UNOWNED;
 	h->state = RDAC_STATE_ACTIVE;
 	err = submit_inquiry(sdev, 0xC9, sizeof(struct c9_inquiry), h);
 	if (err == SCSI_DH_OK) {
 		inqp = &h->inq.c9;
-		if ((inqp->avte_cvp >> 7) == 0x1) {
-			/* LUN in AVT mode */
-			sdev_printk(KERN_NOTICE, sdev,
-				    "%s: AVT mode detected\n",
-				    RDAC_NAME);
-			h->lun_state = RDAC_LUN_AVT;
-		} else if ((inqp->avte_cvp & 0x1) != 0) {
-			/* LUN was owned by the controller */
+		/* detect the operating mode */
+		if ((inqp->avte_cvp >> 5) & 0x1)
+			h->mode = RDAC_MODE_IOSHIP; /* LUN in IOSHIP mode */
+		else if (inqp->avte_cvp >> 7)
+			h->mode = RDAC_MODE_AVT; /* LUN in AVT mode */
+		else
+			h->mode = RDAC_MODE; /* LUN in RDAC mode */
+
+		/* Update ownership */
+		if (inqp->avte_cvp & 0x1)
 			h->lun_state = RDAC_LUN_OWNED;
+		else {
+			h->lun_state = RDAC_LUN_UNOWNED;
+			if (h->mode == RDAC_MODE)
+				h->state = RDAC_STATE_PASSIVE;
 		}
-	}
 
-	if (h->lun_state == RDAC_LUN_UNOWNED)
-		h->state = RDAC_STATE_PASSIVE;
+		/* Update path prio*/
+		if (inqp->path_prio & 0x1)
+			h->preferred = RDAC_PREFERRED;
+		else
+			h->preferred = RDAC_NON_PREFERRED;
+	}
 
 	return err;
 }
@@ -648,12 +670,27 @@ static int rdac_activate(struct scsi_device *sdev,
 {
 	struct rdac_dh_data *h = get_rdac_data(sdev);
 	int err = SCSI_DH_OK;
+	int act = 0;
 
 	err = check_ownership(sdev, h);
 	if (err != SCSI_DH_OK)
 		goto done;
 
-	if (h->lun_state == RDAC_LUN_UNOWNED) {
+	switch (h->mode) {
+	case RDAC_MODE:
+		if (h->lun_state == RDAC_LUN_UNOWNED)
+			act = 1;
+		break;
+	case RDAC_MODE_IOSHIP:
+		if ((h->lun_state == RDAC_LUN_UNOWNED) &&
+		    (h->preferred == RDAC_PREFERRED))
+			act = 1;
+		break;
+	default:
+		break;
+	}
+
+	if (act) {
 		err = queue_mode_select(sdev, fn, data);
 		if (err == SCSI_DH_OK)
 			return 0;
@@ -836,8 +873,9 @@ static int rdac_bus_attach(struct scsi_device *sdev)
 	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
 
 	sdev_printk(KERN_NOTICE, sdev,
-		    "%s: LUN %d (%s)\n",
-		    RDAC_NAME, h->lun, lun_state[(int)h->lun_state]);
+		    "%s: LUN %d (%s) (%s)\n",
+		    RDAC_NAME, h->lun, mode[(int)h->mode],
+		    lun_state[(int)h->lun_state]);
 
 	return 0;
 
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index cffcb108ac96..b4f6c9a84e71 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -780,7 +780,7 @@ static int adpt_abort(struct scsi_cmnd * cmd)
 		return FAILED;
 	}
 	pHba = (adpt_hba*) cmd->device->host->hostdata[0];
-	printk(KERN_INFO"%s: Trying to Abort cmd=%ld\n",pHba->name, cmd->serial_number);
+	printk(KERN_INFO"%s: Trying to Abort\n",pHba->name);
 	if ((dptdevice = (void*) (cmd->device->hostdata)) == NULL) {
 		printk(KERN_ERR "%s: Unable to abort: No device in cmnd\n",pHba->name);
 		return FAILED;
@@ -802,10 +802,10 @@ static int adpt_abort(struct scsi_cmnd * cmd)
 			printk(KERN_INFO"%s: Abort cmd not supported\n",pHba->name);
 			return FAILED;
 		}
-		printk(KERN_INFO"%s: Abort cmd=%ld failed.\n",pHba->name, cmd->serial_number);
+		printk(KERN_INFO"%s: Abort failed.\n",pHba->name);
 		return FAILED;
 	} 
-	printk(KERN_INFO"%s: Abort cmd=%ld complete.\n",pHba->name, cmd->serial_number);
+	printk(KERN_INFO"%s: Abort complete.\n",pHba->name);
 	return SUCCESS;
 }
 
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index 0eb4fe6a4c8a..94de88955a99 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -1766,8 +1766,8 @@ static int eata2x_queuecommand_lck(struct scsi_cmnd *SCpnt,
 	struct mscp *cpp;
 
 	if (SCpnt->host_scribble)
-		panic("%s: qcomm, pid %ld, SCpnt %p already active.\n",
-		      ha->board_name, SCpnt->serial_number, SCpnt);
+		panic("%s: qcomm, SCpnt %p already active.\n",
+		      ha->board_name, SCpnt);
 
 	/* i is the mailbox number, look for the first free mailbox
 	   starting from last_cp_used */
@@ -1801,7 +1801,7 @@ static int eata2x_queuecommand_lck(struct scsi_cmnd *SCpnt,
 
 	if (do_trace)
 		scmd_printk(KERN_INFO, SCpnt,
-			"qcomm, mbox %d, pid %ld.\n", i, SCpnt->serial_number);
+			"qcomm, mbox %d.\n", i);
 
 	cpp->reqsen = 1;
 	cpp->dispri = 1;
@@ -1833,8 +1833,7 @@ static int eata2x_queuecommand_lck(struct scsi_cmnd *SCpnt,
 	if (do_dma(shost->io_port, cpp->cp_dma_addr, SEND_CP_DMA)) {
 		unmap_dma(i, ha);
 		SCpnt->host_scribble = NULL;
-		scmd_printk(KERN_INFO, SCpnt,
-			"qcomm, pid %ld, adapter busy.\n", SCpnt->serial_number);
+		scmd_printk(KERN_INFO, SCpnt, "qcomm, adapter busy.\n");
 		return 1;
 	}
 
@@ -1851,14 +1850,12 @@ static int eata2x_eh_abort(struct scsi_cmnd *SCarg)
 	unsigned int i;
 
 	if (SCarg->host_scribble == NULL) {
-		scmd_printk(KERN_INFO, SCarg,
-			"abort, pid %ld inactive.\n", SCarg->serial_number);
+		scmd_printk(KERN_INFO, SCarg, "abort, cmd inactive.\n");
 		return SUCCESS;
 	}
 
 	i = *(unsigned int *)SCarg->host_scribble;
-	scmd_printk(KERN_WARNING, SCarg,
-		"abort, mbox %d, pid %ld.\n", i, SCarg->serial_number);
+	scmd_printk(KERN_WARNING, SCarg, "abort, mbox %d.\n", i);
 
 	if (i >= shost->can_queue)
 		panic("%s: abort, invalid SCarg->host_scribble.\n", ha->board_name);
@@ -1902,8 +1899,8 @@ static int eata2x_eh_abort(struct scsi_cmnd *SCarg)
 		SCarg->result = DID_ABORT << 16;
 		SCarg->host_scribble = NULL;
 		ha->cp_stat[i] = FREE;
-		printk("%s, abort, mbox %d ready, DID_ABORT, pid %ld done.\n",
-		       ha->board_name, i, SCarg->serial_number);
+		printk("%s, abort, mbox %d ready, DID_ABORT, done.\n",
+		       ha->board_name, i);
 		SCarg->scsi_done(SCarg);
 		return SUCCESS;
 	}
@@ -1919,13 +1916,12 @@ static int eata2x_eh_host_reset(struct scsi_cmnd *SCarg)
 	struct Scsi_Host *shost = SCarg->device->host;
 	struct hostdata *ha = (struct hostdata *)shost->hostdata;
 
-	scmd_printk(KERN_INFO, SCarg,
-		"reset, enter, pid %ld.\n", SCarg->serial_number);
+	scmd_printk(KERN_INFO, SCarg, "reset, enter.\n");
 
 	spin_lock_irq(shost->host_lock);
 
 	if (SCarg->host_scribble == NULL)
-		printk("%s: reset, pid %ld inactive.\n", ha->board_name, SCarg->serial_number);
+		printk("%s: reset, inactive.\n", ha->board_name);
 
 	if (ha->in_reset) {
 		printk("%s: reset, exit, already in reset.\n", ha->board_name);
@@ -1964,14 +1960,14 @@ static int eata2x_eh_host_reset(struct scsi_cmnd *SCarg)
 
 		if (ha->cp_stat[i] == READY || ha->cp_stat[i] == ABORTING) {
 			ha->cp_stat[i] = ABORTING;
-			printk("%s: reset, mbox %d aborting, pid %ld.\n",
-			       ha->board_name, i, SCpnt->serial_number);
+			printk("%s: reset, mbox %d aborting.\n",
+			       ha->board_name, i);
 		}
 
 		else {
 			ha->cp_stat[i] = IN_RESET;
-			printk("%s: reset, mbox %d in reset, pid %ld.\n",
-			       ha->board_name, i, SCpnt->serial_number);
+			printk("%s: reset, mbox %d in reset.\n",
+			       ha->board_name, i);
 		}
 
 		if (SCpnt->host_scribble == NULL)
@@ -2025,8 +2021,8 @@ static int eata2x_eh_host_reset(struct scsi_cmnd *SCarg)
 			ha->cp_stat[i] = LOCKED;
 
 			printk
-			    ("%s, reset, mbox %d locked, DID_RESET, pid %ld done.\n",
-			     ha->board_name, i, SCpnt->serial_number);
+			    ("%s, reset, mbox %d locked, DID_RESET, done.\n",
+			     ha->board_name, i);
 		}
 
 		else if (ha->cp_stat[i] == ABORTING) {
@@ -2039,8 +2035,8 @@ static int eata2x_eh_host_reset(struct scsi_cmnd *SCarg)
 			ha->cp_stat[i] = FREE;
 
 			printk
-			    ("%s, reset, mbox %d aborting, DID_RESET, pid %ld done.\n",
-			     ha->board_name, i, SCpnt->serial_number);
+			    ("%s, reset, mbox %d aborting, DID_RESET, done.\n",
+			     ha->board_name, i);
 		}
 
 		else
@@ -2054,7 +2050,7 @@ static int eata2x_eh_host_reset(struct scsi_cmnd *SCarg)
 	do_trace = 0;
 
 	if (arg_done)
-		printk("%s: reset, exit, pid %ld done.\n", ha->board_name, SCarg->serial_number);
+		printk("%s: reset, exit, done.\n", ha->board_name);
 	else
 		printk("%s: reset, exit.\n", ha->board_name);
 
@@ -2238,10 +2234,10 @@ static int reorder(struct hostdata *ha, unsigned long cursec,
 			cpp = &ha->cp[k];
 			SCpnt = cpp->SCpnt;
 			scmd_printk(KERN_INFO, SCpnt,
-			    "%s pid %ld mb %d fc %d nr %d sec %ld ns %u"
+			    "%s mb %d fc %d nr %d sec %ld ns %u"
 			     " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
 			     (ihdlr ? "ihdlr" : "qcomm"),
-			     SCpnt->serial_number, k, flushcount,
+			     k, flushcount,
 			     n_ready, blk_rq_pos(SCpnt->request),
 			     blk_rq_sectors(SCpnt->request), cursec, YESNO(s),
 			     YESNO(r), YESNO(rev), YESNO(input_only),
@@ -2285,10 +2281,10 @@ static void flush_dev(struct scsi_device *dev, unsigned long cursec,
 
 		if (do_dma(dev->host->io_port, cpp->cp_dma_addr, SEND_CP_DMA)) {
 			scmd_printk(KERN_INFO, SCpnt,
-			    "%s, pid %ld, mbox %d, adapter"
+			    "%s, mbox %d, adapter"
 			     " busy, will abort.\n",
 			     (ihdlr ? "ihdlr" : "qcomm"),
-			     SCpnt->serial_number, k);
+			     k);
 			ha->cp_stat[k] = ABORTING;
 			continue;
 		}
@@ -2398,12 +2394,12 @@ static irqreturn_t ihdlr(struct Scsi_Host *shost)
 		panic("%s: ihdlr, mbox %d, SCpnt == NULL.\n", ha->board_name, i);
 
 	if (SCpnt->host_scribble == NULL)
-		panic("%s: ihdlr, mbox %d, pid %ld, SCpnt %p garbled.\n", ha->board_name,
-		      i, SCpnt->serial_number, SCpnt);
+		panic("%s: ihdlr, mbox %d, SCpnt %p garbled.\n", ha->board_name,
+		      i, SCpnt);
 
 	if (*(unsigned int *)SCpnt->host_scribble != i)
-		panic("%s: ihdlr, mbox %d, pid %ld, index mismatch %d.\n",
-		      ha->board_name, i, SCpnt->serial_number,
+		panic("%s: ihdlr, mbox %d, index mismatch %d.\n",
+		      ha->board_name, i,
 		      *(unsigned int *)SCpnt->host_scribble);
 
 	sync_dma(i, ha);
@@ -2449,11 +2445,11 @@ static irqreturn_t ihdlr(struct Scsi_Host *shost)
 		if (spp->target_status && SCpnt->device->type == TYPE_DISK &&
 		    (!(tstatus == CHECK_CONDITION && ha->iocount <= 1000 &&
 		       (SCpnt->sense_buffer[2] & 0xf) == NOT_READY)))
-			printk("%s: ihdlr, target %d.%d:%d, pid %ld, "
+			printk("%s: ihdlr, target %d.%d:%d, "
 			       "target_status 0x%x, sense key 0x%x.\n",
 			       ha->board_name,
 			       SCpnt->device->channel, SCpnt->device->id,
-			       SCpnt->device->lun, SCpnt->serial_number,
+			       SCpnt->device->lun,
 			       spp->target_status, SCpnt->sense_buffer[2]);
 
 		ha->target_to[SCpnt->device->id][SCpnt->device->channel] = 0;
@@ -2522,9 +2518,9 @@ static irqreturn_t ihdlr(struct Scsi_Host *shost)
 	    do_trace || msg_byte(spp->target_status))
 #endif
 		scmd_printk(KERN_INFO, SCpnt, "ihdlr, mbox %2d, err 0x%x:%x,"
-		       " pid %ld, reg 0x%x, count %d.\n",
+		       " reg 0x%x, count %d.\n",
 		       i, spp->adapter_status, spp->target_status,
-		       SCpnt->serial_number, reg, ha->iocount);
+		       reg, ha->iocount);
 
 	unmap_dma(i, ha);
 
diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c
index 4a9641e69f54..d5f8362335d3 100644
--- a/drivers/scsi/eata_pio.c
+++ b/drivers/scsi/eata_pio.c
@@ -372,8 +372,7 @@ static int eata_pio_queue_lck(struct scsi_cmnd *cmd,
 	cp->status = USED;	/* claim free slot */
 
 	DBG(DBG_QUEUE, scmd_printk(KERN_DEBUG, cmd,
-		"eata_pio_queue pid %ld, y %d\n",
-		cmd->serial_number, y));
+		"eata_pio_queue 0x%p, y %d\n", cmd, y));
 
 	cmd->scsi_done = (void *) done;
 
@@ -417,8 +416,8 @@ static int eata_pio_queue_lck(struct scsi_cmnd *cmd,
 	if (eata_pio_send_command(base, EATA_CMD_PIO_SEND_CP)) {
 		cmd->result = DID_BUS_BUSY << 16;
 		scmd_printk(KERN_NOTICE, cmd,
-			"eata_pio_queue pid %ld, HBA busy, "
-			"returning DID_BUS_BUSY, done.\n", cmd->serial_number);
+			"eata_pio_queue pid 0x%p, HBA busy, "
+			"returning DID_BUS_BUSY, done.\n", cmd);
 		done(cmd);
 		cp->status = FREE;
 		return 0;
@@ -432,8 +431,8 @@ static int eata_pio_queue_lck(struct scsi_cmnd *cmd,
 		outw(0, base + HA_RDATA);
 
 	DBG(DBG_QUEUE, scmd_printk(KERN_DEBUG, cmd,
-		"Queued base %#.4lx pid: %ld "
-		"slot %d irq %d\n", sh->base, cmd->serial_number, y, sh->irq));
+		"Queued base %#.4lx cmd: 0x%p "
+		"slot %d irq %d\n", sh->base, cmd, y, sh->irq));
 
 	return 0;
 }
@@ -445,8 +444,7 @@ static int eata_pio_abort(struct scsi_cmnd *cmd)
 	unsigned int loop = 100;
 
 	DBG(DBG_ABNORM, scmd_printk(KERN_WARNING, cmd,
-		"eata_pio_abort called pid: %ld\n",
-		cmd->serial_number));
+		"eata_pio_abort called pid: 0x%p\n", cmd));
 
 	while (inb(cmd->device->host->base + HA_RAUXSTAT) & HA_ABUSY)
 		if (--loop == 0) {
@@ -481,8 +479,7 @@ static int eata_pio_host_reset(struct scsi_cmnd *cmd)
 	struct Scsi_Host *host = cmd->device->host;
 
 	DBG(DBG_ABNORM, scmd_printk(KERN_WARNING, cmd,
-		"eata_pio_reset called pid:%ld\n",
-		cmd->serial_number));
+		"eata_pio_reset called\n"));
 
 	spin_lock_irq(host->host_lock);
 
@@ -501,7 +498,7 @@ static int eata_pio_host_reset(struct scsi_cmnd *cmd)
 
 		sp = HD(cmd)->ccb[x].cmd;
 		HD(cmd)->ccb[x].status = RESET;
-		printk(KERN_WARNING "eata_pio_reset: slot %d in reset, pid %ld.\n", x, sp->serial_number);
+		printk(KERN_WARNING "eata_pio_reset: slot %d in reset.\n", x);
 
 		if (sp == NULL)
 			panic("eata_pio_reset: slot %d, sp==NULL.\n", x);
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index 57558523c1b8..9a1af1d6071a 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -708,8 +708,7 @@ static void esp_maybe_execute_command(struct esp *esp)
 	tp = &esp->target[tgt];
 	lp = dev->hostdata;
 
-	list_del(&ent->list);
-	list_add(&ent->list, &esp->active_cmds);
+	list_move(&ent->list, &esp->active_cmds);
 
 	esp->active_cmd = ent;
 
@@ -1244,8 +1243,7 @@ static int esp_finish_select(struct esp *esp)
 		/* Now that the state is unwound properly, put back onto
 		 * the issue queue.  This command is no longer active.
 		 */
-		list_del(&ent->list);
-		list_add(&ent->list, &esp->queued_cmds);
+		list_move(&ent->list, &esp->queued_cmds);
 		esp->active_cmd = NULL;
 
 		/* Return value ignored by caller, it directly invokes
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 04f346b562da..cc23bd9480b2 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -381,6 +381,42 @@ out:
 }
 
 /**
+ * fcoe_interface_release() - fcoe_port kref release function
+ * @kref: Embedded reference count in an fcoe_interface struct
+ */
+static void fcoe_interface_release(struct kref *kref)
+{
+	struct fcoe_interface *fcoe;
+	struct net_device *netdev;
+
+	fcoe = container_of(kref, struct fcoe_interface, kref);
+	netdev = fcoe->netdev;
+	/* tear-down the FCoE controller */
+	fcoe_ctlr_destroy(&fcoe->ctlr);
+	kfree(fcoe);
+	dev_put(netdev);
+	module_put(THIS_MODULE);
+}
+
+/**
+ * fcoe_interface_get() - Get a reference to a FCoE interface
+ * @fcoe: The FCoE interface to be held
+ */
+static inline void fcoe_interface_get(struct fcoe_interface *fcoe)
+{
+	kref_get(&fcoe->kref);
+}
+
+/**
+ * fcoe_interface_put() - Put a reference to a FCoE interface
+ * @fcoe: The FCoE interface to be released
+ */
+static inline void fcoe_interface_put(struct fcoe_interface *fcoe)
+{
+	kref_put(&fcoe->kref, fcoe_interface_release);
+}
+
+/**
  * fcoe_interface_cleanup() - Clean up a FCoE interface
  * @fcoe: The FCoE interface to be cleaned up
  *
@@ -392,6 +428,21 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 	struct fcoe_ctlr *fip = &fcoe->ctlr;
 	u8 flogi_maddr[ETH_ALEN];
 	const struct net_device_ops *ops;
+	struct fcoe_port *port = lport_priv(fcoe->ctlr.lp);
+
+	FCOE_NETDEV_DBG(netdev, "Destroying interface\n");
+
+	/* Logout of the fabric */
+	fc_fabric_logoff(fcoe->ctlr.lp);
+
+	/* Cleanup the fc_lport */
+	fc_lport_destroy(fcoe->ctlr.lp);
+
+	/* Stop the transmit retry timer */
+	del_timer_sync(&port->timer);
+
+	/* Free existing transmit skbs */
+	fcoe_clean_pending_queue(fcoe->ctlr.lp);
 
 	/*
 	 * Don't listen for Ethernet packets anymore.
@@ -414,6 +465,9 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 	} else
 		dev_mc_del(netdev, FIP_ALL_ENODE_MACS);
 
+	if (!is_zero_ether_addr(port->data_src_addr))
+		dev_uc_del(netdev, port->data_src_addr);
+
 	/* Tell the LLD we are done w/ FCoE */
 	ops = netdev->netdev_ops;
 	if (ops->ndo_fcoe_disable) {
@@ -421,42 +475,7 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 			FCOE_NETDEV_DBG(netdev, "Failed to disable FCoE"
 					" specific feature for LLD.\n");
 	}
-}
-
-/**
- * fcoe_interface_release() - fcoe_port kref release function
- * @kref: Embedded reference count in an fcoe_interface struct
- */
-static void fcoe_interface_release(struct kref *kref)
-{
-	struct fcoe_interface *fcoe;
-	struct net_device *netdev;
-
-	fcoe = container_of(kref, struct fcoe_interface, kref);
-	netdev = fcoe->netdev;
-	/* tear-down the FCoE controller */
-	fcoe_ctlr_destroy(&fcoe->ctlr);
-	kfree(fcoe);
-	dev_put(netdev);
-	module_put(THIS_MODULE);
-}
-
-/**
- * fcoe_interface_get() - Get a reference to a FCoE interface
- * @fcoe: The FCoE interface to be held
- */
-static inline void fcoe_interface_get(struct fcoe_interface *fcoe)
-{
-	kref_get(&fcoe->kref);
-}
-
-/**
- * fcoe_interface_put() - Put a reference to a FCoE interface
- * @fcoe: The FCoE interface to be released
- */
-static inline void fcoe_interface_put(struct fcoe_interface *fcoe)
-{
-	kref_put(&fcoe->kref, fcoe_interface_release);
+	fcoe_interface_put(fcoe);
 }
 
 /**
@@ -821,39 +840,9 @@ skip_oem:
  * fcoe_if_destroy() - Tear down a SW FCoE instance
  * @lport: The local port to be destroyed
  *
- * Locking: must be called with the RTNL mutex held and RTNL mutex
- * needed to be dropped by this function since not dropping RTNL
- * would cause circular locking warning on synchronous fip worker
- * cancelling thru fcoe_interface_put invoked by this function.
- *
  */
 static void fcoe_if_destroy(struct fc_lport *lport)
 {
-	struct fcoe_port *port = lport_priv(lport);
-	struct fcoe_interface *fcoe = port->priv;
-	struct net_device *netdev = fcoe->netdev;
-
-	FCOE_NETDEV_DBG(netdev, "Destroying interface\n");
-
-	/* Logout of the fabric */
-	fc_fabric_logoff(lport);
-
-	/* Cleanup the fc_lport */
-	fc_lport_destroy(lport);
-
-	/* Stop the transmit retry timer */
-	del_timer_sync(&port->timer);
-
-	/* Free existing transmit skbs */
-	fcoe_clean_pending_queue(lport);
-
-	if (!is_zero_ether_addr(port->data_src_addr))
-		dev_uc_del(netdev, port->data_src_addr);
-	rtnl_unlock();
-
-	/* receives may not be stopped until after this */
-	fcoe_interface_put(fcoe);
-
 	/* Free queued packets for the per-CPU receive threads */
 	fcoe_percpu_clean(lport);
 
@@ -1783,23 +1772,8 @@ static int fcoe_disable(struct net_device *netdev)
 	int rc = 0;
 
 	mutex_lock(&fcoe_config_mutex);
-#ifdef CONFIG_FCOE_MODULE
-	/*
-	 * Make sure the module has been initialized, and is not about to be
-	 * removed.  Module paramter sysfs files are writable before the
-	 * module_init function is called and after module_exit.
-	 */
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto out_nodev;
-	}
-#endif
-
-	if (!rtnl_trylock()) {
-		mutex_unlock(&fcoe_config_mutex);
-		return -ERESTARTSYS;
-	}
 
+	rtnl_lock();
 	fcoe = fcoe_hostlist_lookup_port(netdev);
 	rtnl_unlock();
 
@@ -1809,7 +1783,6 @@ static int fcoe_disable(struct net_device *netdev)
 	} else
 		rc = -ENODEV;
 
-out_nodev:
 	mutex_unlock(&fcoe_config_mutex);
 	return rc;
 }
@@ -1828,22 +1801,7 @@ static int fcoe_enable(struct net_device *netdev)
 	int rc = 0;
 
 	mutex_lock(&fcoe_config_mutex);
-#ifdef CONFIG_FCOE_MODULE
-	/*
-	 * Make sure the module has been initialized, and is not about to be
-	 * removed.  Module paramter sysfs files are writable before the
-	 * module_init function is called and after module_exit.
-	 */
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto out_nodev;
-	}
-#endif
-	if (!rtnl_trylock()) {
-		mutex_unlock(&fcoe_config_mutex);
-		return -ERESTARTSYS;
-	}
-
+	rtnl_lock();
 	fcoe = fcoe_hostlist_lookup_port(netdev);
 	rtnl_unlock();
 
@@ -1852,7 +1810,6 @@ static int fcoe_enable(struct net_device *netdev)
 	else if (!fcoe_link_ok(fcoe->ctlr.lp))
 		fcoe_ctlr_link_up(&fcoe->ctlr);
 
-out_nodev:
 	mutex_unlock(&fcoe_config_mutex);
 	return rc;
 }
@@ -1868,35 +1825,22 @@ out_nodev:
 static int fcoe_destroy(struct net_device *netdev)
 {
 	struct fcoe_interface *fcoe;
+	struct fc_lport *lport;
 	int rc = 0;
 
 	mutex_lock(&fcoe_config_mutex);
-#ifdef CONFIG_FCOE_MODULE
-	/*
-	 * Make sure the module has been initialized, and is not about to be
-	 * removed.  Module paramter sysfs files are writable before the
-	 * module_init function is called and after module_exit.
-	 */
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto out_nodev;
-	}
-#endif
-	if (!rtnl_trylock()) {
-		mutex_unlock(&fcoe_config_mutex);
-		return -ERESTARTSYS;
-	}
-
+	rtnl_lock();
 	fcoe = fcoe_hostlist_lookup_port(netdev);
 	if (!fcoe) {
 		rtnl_unlock();
 		rc = -ENODEV;
 		goto out_nodev;
 	}
-	fcoe_interface_cleanup(fcoe);
+	lport = fcoe->ctlr.lp;
 	list_del(&fcoe->list);
-	/* RTNL mutex is dropped by fcoe_if_destroy */
-	fcoe_if_destroy(fcoe->ctlr.lp);
+	fcoe_interface_cleanup(fcoe);
+	rtnl_unlock();
+	fcoe_if_destroy(lport);
 out_nodev:
 	mutex_unlock(&fcoe_config_mutex);
 	return rc;
@@ -1912,8 +1856,6 @@ static void fcoe_destroy_work(struct work_struct *work)
 
 	port = container_of(work, struct fcoe_port, destroy_work);
 	mutex_lock(&fcoe_config_mutex);
-	rtnl_lock();
-	/* RTNL mutex is dropped by fcoe_if_destroy */
 	fcoe_if_destroy(port->lport);
 	mutex_unlock(&fcoe_config_mutex);
 }
@@ -1948,23 +1890,7 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
 	struct fc_lport *lport;
 
 	mutex_lock(&fcoe_config_mutex);
-
-	if (!rtnl_trylock()) {
-		mutex_unlock(&fcoe_config_mutex);
-		return -ERESTARTSYS;
-	}
-
-#ifdef CONFIG_FCOE_MODULE
-	/*
-	 * Make sure the module has been initialized, and is not about to be
-	 * removed.  Module paramter sysfs files are writable before the
-	 * module_init function is called and after module_exit.
-	 */
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto out_nodev;
-	}
-#endif
+	rtnl_lock();
 
 	/* look for existing lport */
 	if (fcoe_hostlist_lookup(netdev)) {
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 9d38be2a41f9..229e4af5508a 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -978,10 +978,8 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	 * the FCF that answers multicast solicitations, not the others that
 	 * are sending periodic multicast advertisements.
 	 */
-	if (mtu_valid) {
-		list_del(&fcf->list);
-		list_add(&fcf->list, &fip->fcfs);
-	}
+	if (mtu_valid)
+		list_move(&fcf->list, &fip->fcfs);
 
 	/*
 	 * If this is the first validated FCF, note the time and
diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c
index 258684101bfd..f81f77c8569e 100644
--- a/drivers/scsi/fcoe/fcoe_transport.c
+++ b/drivers/scsi/fcoe/fcoe_transport.c
@@ -335,7 +335,7 @@ out_attach:
 EXPORT_SYMBOL(fcoe_transport_attach);
 
 /**
- * fcoe_transport_attach - Detaches an FCoE transport
+ * fcoe_transport_detach - Detaches an FCoE transport
  * @ft: The fcoe transport to be attached
  *
  * Returns : 0 for success
@@ -343,6 +343,7 @@ EXPORT_SYMBOL(fcoe_transport_attach);
 int fcoe_transport_detach(struct fcoe_transport *ft)
 {
 	int rc = 0;
+	struct fcoe_netdev_mapping *nm = NULL, *tmp;
 
 	mutex_lock(&ft_mutex);
 	if (!ft->attached) {
@@ -352,6 +353,19 @@ int fcoe_transport_detach(struct fcoe_transport *ft)
 		goto out_attach;
 	}
 
+	/* remove netdev mapping for this transport as it is going away */
+	mutex_lock(&fn_mutex);
+	list_for_each_entry_safe(nm, tmp, &fcoe_netdevs, list) {
+		if (nm->ft == ft) {
+			LIBFCOE_TRANSPORT_DBG("transport %s going away, "
+				"remove its netdev mapping for %s\n",
+				ft->name, nm->netdev->name);
+			list_del(&nm->list);
+			kfree(nm);
+		}
+	}
+	mutex_unlock(&fn_mutex);
+
 	list_del(&ft->list);
 	ft->attached = false;
 	LIBFCOE_TRANSPORT_DBG("detaching transport %s\n", ft->name);
@@ -371,9 +385,9 @@ static int fcoe_transport_show(char *buffer, const struct kernel_param *kp)
 	i = j = sprintf(buffer, "Attached FCoE transports:");
 	mutex_lock(&ft_mutex);
 	list_for_each_entry(ft, &fcoe_transports, list) {
-		i += snprintf(&buffer[i], IFNAMSIZ, "%s ", ft->name);
-		if (i >= PAGE_SIZE)
+		if (i >= PAGE_SIZE - IFNAMSIZ)
 			break;
+		i += snprintf(&buffer[i], IFNAMSIZ, "%s ", ft->name);
 	}
 	mutex_unlock(&ft_mutex);
 	if (i == j)
@@ -530,9 +544,6 @@ static int fcoe_transport_create(const char *buffer, struct kernel_param *kp)
 	struct fcoe_transport *ft = NULL;
 	enum fip_state fip_mode = (enum fip_state)(long)kp->arg;
 
-	if (!mutex_trylock(&ft_mutex))
-		return restart_syscall();
-
 #ifdef CONFIG_LIBFCOE_MODULE
 	/*
 	 * Make sure the module has been initialized, and is not about to be
@@ -543,6 +554,8 @@ static int fcoe_transport_create(const char *buffer, struct kernel_param *kp)
 		goto out_nodev;
 #endif
 
+	mutex_lock(&ft_mutex);
+
 	netdev = fcoe_if_to_netdev(buffer);
 	if (!netdev) {
 		LIBFCOE_TRANSPORT_DBG("Invalid device %s.\n", buffer);
@@ -586,10 +599,7 @@ out_putdev:
 	dev_put(netdev);
 out_nodev:
 	mutex_unlock(&ft_mutex);
-	if (rc == -ERESTARTSYS)
-		return restart_syscall();
-	else
-		return rc;
+	return rc;
 }
 
 /**
@@ -608,9 +618,6 @@ static int fcoe_transport_destroy(const char *buffer, struct kernel_param *kp)
 	struct net_device *netdev = NULL;
 	struct fcoe_transport *ft = NULL;
 
-	if (!mutex_trylock(&ft_mutex))
-		return restart_syscall();
-
 #ifdef CONFIG_LIBFCOE_MODULE
 	/*
 	 * Make sure the module has been initialized, and is not about to be
@@ -621,6 +628,8 @@ static int fcoe_transport_destroy(const char *buffer, struct kernel_param *kp)
 		goto out_nodev;
 #endif
 
+	mutex_lock(&ft_mutex);
+
 	netdev = fcoe_if_to_netdev(buffer);
 	if (!netdev) {
 		LIBFCOE_TRANSPORT_DBG("invalid device %s.\n", buffer);
@@ -645,11 +654,7 @@ out_putdev:
 	dev_put(netdev);
 out_nodev:
 	mutex_unlock(&ft_mutex);
-
-	if (rc == -ERESTARTSYS)
-		return restart_syscall();
-	else
-		return rc;
+	return rc;
 }
 
 /**
@@ -667,9 +672,6 @@ static int fcoe_transport_disable(const char *buffer, struct kernel_param *kp)
 	struct net_device *netdev = NULL;
 	struct fcoe_transport *ft = NULL;
 
-	if (!mutex_trylock(&ft_mutex))
-		return restart_syscall();
-
 #ifdef CONFIG_LIBFCOE_MODULE
 	/*
 	 * Make sure the module has been initialized, and is not about to be
@@ -680,6 +682,8 @@ static int fcoe_transport_disable(const char *buffer, struct kernel_param *kp)
 		goto out_nodev;
 #endif
 
+	mutex_lock(&ft_mutex);
+
 	netdev = fcoe_if_to_netdev(buffer);
 	if (!netdev)
 		goto out_nodev;
@@ -716,9 +720,6 @@ static int fcoe_transport_enable(const char *buffer, struct kernel_param *kp)
 	struct net_device *netdev = NULL;
 	struct fcoe_transport *ft = NULL;
 
-	if (!mutex_trylock(&ft_mutex))
-		return restart_syscall();
-
 #ifdef CONFIG_LIBFCOE_MODULE
 	/*
 	 * Make sure the module has been initialized, and is not about to be
@@ -729,6 +730,8 @@ static int fcoe_transport_enable(const char *buffer, struct kernel_param *kp)
 		goto out_nodev;
 #endif
 
+	mutex_lock(&ft_mutex);
+
 	netdev = fcoe_if_to_netdev(buffer);
 	if (!netdev)
 		goto out_nodev;
@@ -743,10 +746,7 @@ out_putdev:
 	dev_put(netdev);
 out_nodev:
 	mutex_unlock(&ft_mutex);
-	if (rc == -ERESTARTSYS)
-		return restart_syscall();
-	else
-		return rc;
+	return rc;
 }
 
 /**
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 415ad4fb50d4..c6c0434d8034 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -273,7 +273,7 @@ static ssize_t host_show_transport_mode(struct device *dev,
 			"performant" : "simple");
 }
 
-/* List of controllers which cannot be reset on kexec with reset_devices */
+/* List of controllers which cannot be hard reset on kexec with reset_devices */
 static u32 unresettable_controller[] = {
 	0x324a103C, /* Smart Array P712m */
 	0x324b103C, /* SmartArray P711m */
@@ -291,16 +291,45 @@ static u32 unresettable_controller[] = {
 	0x409D0E11, /* Smart Array 6400 EM */
 };
 
-static int ctlr_is_resettable(struct ctlr_info *h)
+/* List of controllers which cannot even be soft reset */
+static u32 soft_unresettable_controller[] = {
+	/* Exclude 640x boards.  These are two pci devices in one slot
+	 * which share a battery backed cache module.  One controls the
+	 * cache, the other accesses the cache through the one that controls
+	 * it.  If we reset the one controlling the cache, the other will
+	 * likely not be happy.  Just forbid resetting this conjoined mess.
+	 * The 640x isn't really supported by hpsa anyway.
+	 */
+	0x409C0E11, /* Smart Array 6400 */
+	0x409D0E11, /* Smart Array 6400 EM */
+};
+
+static int ctlr_is_hard_resettable(u32 board_id)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
-		if (unresettable_controller[i] == h->board_id)
+		if (unresettable_controller[i] == board_id)
+			return 0;
+	return 1;
+}
+
+static int ctlr_is_soft_resettable(u32 board_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
+		if (soft_unresettable_controller[i] == board_id)
 			return 0;
 	return 1;
 }
 
+static int ctlr_is_resettable(u32 board_id)
+{
+	return ctlr_is_hard_resettable(board_id) ||
+		ctlr_is_soft_resettable(board_id);
+}
+
 static ssize_t host_show_resettable(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
@@ -308,7 +337,7 @@ static ssize_t host_show_resettable(struct device *dev,
 	struct Scsi_Host *shost = class_to_shost(dev);
 
 	h = shost_to_hba(shost);
-	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h));
+	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id));
 }
 
 static inline int is_logical_dev_addr_mode(unsigned char scsi3addr[])
@@ -929,13 +958,6 @@ static void hpsa_slave_destroy(struct scsi_device *sdev)
 	/* nothing to do. */
 }
 
-static void hpsa_scsi_setup(struct ctlr_info *h)
-{
-	h->ndevices = 0;
-	h->scsi_host = NULL;
-	spin_lock_init(&h->devlock);
-}
-
 static void hpsa_free_sg_chain_blocks(struct ctlr_info *h)
 {
 	int i;
@@ -1006,8 +1028,7 @@ static void hpsa_unmap_sg_chain_block(struct ctlr_info *h,
 	pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE);
 }
 
-static void complete_scsi_command(struct CommandList *cp,
-	int timeout, u32 tag)
+static void complete_scsi_command(struct CommandList *cp)
 {
 	struct scsi_cmnd *cmd;
 	struct ctlr_info *h;
@@ -1308,7 +1329,7 @@ static void hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
 	int retry_count = 0;
 
 	do {
-		memset(c->err_info, 0, sizeof(c->err_info));
+		memset(c->err_info, 0, sizeof(*c->err_info));
 		hpsa_scsi_do_simple_cmd_core(h, c);
 		retry_count++;
 	} while (check_for_unit_attention(h, c) && retry_count <= 3);
@@ -1570,6 +1591,7 @@ static unsigned char *msa2xxx_model[] = {
 	"MSA2024",
 	"MSA2312",
 	"MSA2324",
+	"P2000 G3 SAS",
 	NULL,
 };
 
@@ -2751,6 +2773,26 @@ static int hpsa_ioctl(struct scsi_device *dev, int cmd, void *arg)
 	}
 }
 
+static int __devinit hpsa_send_host_reset(struct ctlr_info *h,
+	unsigned char *scsi3addr, u8 reset_type)
+{
+	struct CommandList *c;
+
+	c = cmd_alloc(h);
+	if (!c)
+		return -ENOMEM;
+	fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0,
+		RAID_CTLR_LUNID, TYPE_MSG);
+	c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
+	c->waiting = NULL;
+	enqueue_cmd_and_start_io(h, c);
+	/* Don't wait for completion, the reset won't complete.  Don't free
+	 * the command either.  This is the last command we will send before
+	 * re-initializing everything, so it doesn't matter and won't leak.
+	 */
+	return 0;
+}
+
 static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 	void *buff, size_t size, u8 page_code, unsigned char *scsi3addr,
 	int cmd_type)
@@ -2828,7 +2870,8 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_NONE;
 			c->Request.Timeout = 0; /* Don't time out */
-			c->Request.CDB[0] =  0x01; /* RESET_MSG is 0x01 */
+			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
+			c->Request.CDB[0] =  cmd;
 			c->Request.CDB[1] = 0x03;  /* Reset target above */
 			/* If bytes 4-7 are zero, it means reset the */
 			/* LunID device */
@@ -2936,7 +2979,7 @@ static inline void finish_cmd(struct CommandList *c, u32 raw_tag)
 {
 	removeQ(c);
 	if (likely(c->cmd_type == CMD_SCSI))
-		complete_scsi_command(c, 0, raw_tag);
+		complete_scsi_command(c);
 	else if (c->cmd_type == CMD_IOCTL_PEND)
 		complete(c->waiting);
 }
@@ -2994,6 +3037,63 @@ static inline u32 process_nonindexed_cmd(struct ctlr_info *h,
 	return next_command(h);
 }
 
+/* Some controllers, like p400, will give us one interrupt
+ * after a soft reset, even if we turned interrupts off.
+ * Only need to check for this in the hpsa_xxx_discard_completions
+ * functions.
+ */
+static int ignore_bogus_interrupt(struct ctlr_info *h)
+{
+	if (likely(!reset_devices))
+		return 0;
+
+	if (likely(h->interrupts_enabled))
+		return 0;
+
+	dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled "
+		"(known firmware bug.)  Ignoring.\n");
+
+	return 1;
+}
+
+static irqreturn_t hpsa_intx_discard_completions(int irq, void *dev_id)
+{
+	struct ctlr_info *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	if (interrupt_not_for_us(h))
+		return IRQ_NONE;
+	spin_lock_irqsave(&h->lock, flags);
+	while (interrupt_pending(h)) {
+		raw_tag = get_next_completion(h);
+		while (raw_tag != FIFO_EMPTY)
+			raw_tag = next_command(h);
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t hpsa_msix_discard_completions(int irq, void *dev_id)
+{
+	struct ctlr_info *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&h->lock, flags);
+	raw_tag = get_next_completion(h);
+	while (raw_tag != FIFO_EMPTY)
+		raw_tag = next_command(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id)
 {
 	struct ctlr_info *h = dev_id;
@@ -3132,11 +3232,10 @@ static __devinit int hpsa_message(struct pci_dev *pdev, unsigned char opcode,
 	return 0;
 }
 
-#define hpsa_soft_reset_controller(p) hpsa_message(p, 1, 0)
 #define hpsa_noop(p) hpsa_message(p, 3, 0)
 
 static int hpsa_controller_hard_reset(struct pci_dev *pdev,
-	void * __iomem vaddr, bool use_doorbell)
+	void * __iomem vaddr, u32 use_doorbell)
 {
 	u16 pmcsr;
 	int pos;
@@ -3147,8 +3246,7 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev,
 		 * other way using the doorbell register.
 		 */
 		dev_info(&pdev->dev, "using doorbell to reset controller\n");
-		writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL);
-		msleep(1000);
+		writel(use_doorbell, vaddr + SA5_DOORBELL);
 	} else { /* Try to do it the PCI power state way */
 
 		/* Quoting from the Open CISS Specification: "The Power
@@ -3179,12 +3277,63 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev,
 		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
 		pmcsr |= PCI_D0;
 		pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
-
-		msleep(500);
 	}
 	return 0;
 }
 
+static __devinit void init_driver_version(char *driver_version, int len)
+{
+	memset(driver_version, 0, len);
+	strncpy(driver_version, "hpsa " HPSA_DRIVER_VERSION, len - 1);
+}
+
+static __devinit int write_driver_ver_to_cfgtable(
+	struct CfgTable __iomem *cfgtable)
+{
+	char *driver_version;
+	int i, size = sizeof(cfgtable->driver_version);
+
+	driver_version = kmalloc(size, GFP_KERNEL);
+	if (!driver_version)
+		return -ENOMEM;
+
+	init_driver_version(driver_version, size);
+	for (i = 0; i < size; i++)
+		writeb(driver_version[i], &cfgtable->driver_version[i]);
+	kfree(driver_version);
+	return 0;
+}
+
+static __devinit void read_driver_ver_from_cfgtable(
+	struct CfgTable __iomem *cfgtable, unsigned char *driver_ver)
+{
+	int i;
+
+	for (i = 0; i < sizeof(cfgtable->driver_version); i++)
+		driver_ver[i] = readb(&cfgtable->driver_version[i]);
+}
+
+static __devinit int controller_reset_failed(
+	struct CfgTable __iomem *cfgtable)
+{
+
+	char *driver_ver, *old_driver_ver;
+	int rc, size = sizeof(cfgtable->driver_version);
+
+	old_driver_ver = kmalloc(2 * size, GFP_KERNEL);
+	if (!old_driver_ver)
+		return -ENOMEM;
+	driver_ver = old_driver_ver + size;
+
+	/* After a reset, the 32 bytes of "driver version" in the cfgtable
+	 * should have been changed, otherwise we know the reset failed.
+	 */
+	init_driver_version(old_driver_ver, size);
+	read_driver_ver_from_cfgtable(cfgtable, driver_ver);
+	rc = !memcmp(driver_ver, old_driver_ver, size);
+	kfree(old_driver_ver);
+	return rc;
+}
 /* This does a hard reset of the controller using PCI power management
  * states or the using the doorbell register.
  */
@@ -3195,10 +3344,10 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
 	u64 cfg_base_addr_index;
 	void __iomem *vaddr;
 	unsigned long paddr;
-	u32 misc_fw_support, active_transport;
+	u32 misc_fw_support;
 	int rc;
 	struct CfgTable __iomem *cfgtable;
-	bool use_doorbell;
+	u32 use_doorbell;
 	u32 board_id;
 	u16 command_register;
 
@@ -3215,20 +3364,15 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
 	 * using the doorbell register.
 	 */
 
-	/* Exclude 640x boards.  These are two pci devices in one slot
-	 * which share a battery backed cache module.  One controls the
-	 * cache, the other accesses the cache through the one that controls
-	 * it.  If we reset the one controlling the cache, the other will
-	 * likely not be happy.  Just forbid resetting this conjoined mess.
-	 * The 640x isn't really supported by hpsa anyway.
-	 */
 	rc = hpsa_lookup_board_id(pdev, &board_id);
-	if (rc < 0) {
+	if (rc < 0 || !ctlr_is_resettable(board_id)) {
 		dev_warn(&pdev->dev, "Not resetting device.\n");
 		return -ENODEV;
 	}
-	if (board_id == 0x409C0E11 || board_id == 0x409D0E11)
-		return -ENOTSUPP;
+
+	/* if controller is soft- but not hard resettable... */
+	if (!ctlr_is_hard_resettable(board_id))
+		return -ENOTSUPP; /* try soft reset later. */
 
 	/* Save the PCI command register */
 	pci_read_config_word(pdev, 4, &command_register);
@@ -3257,10 +3401,28 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
 		rc = -ENOMEM;
 		goto unmap_vaddr;
 	}
+	rc = write_driver_ver_to_cfgtable(cfgtable);
+	if (rc)
+		goto unmap_vaddr;
 
-	/* If reset via doorbell register is supported, use that. */
+	/* If reset via doorbell register is supported, use that.
+	 * There are two such methods.  Favor the newest method.
+	 */
 	misc_fw_support = readl(&cfgtable->misc_fw_support);
-	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
+	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2;
+	if (use_doorbell) {
+		use_doorbell = DOORBELL_CTLR_RESET2;
+	} else {
+		use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
+		if (use_doorbell) {
+			dev_warn(&pdev->dev, "Controller claims that "
+				"'Bit 2 doorbell reset' is "
+				"supported, but not 'bit 5 doorbell reset'.  "
+				"Firmware update is recommended.\n");
+			rc = -ENOTSUPP; /* try soft reset */
+			goto unmap_cfgtable;
+		}
+	}
 
 	rc = hpsa_controller_hard_reset(pdev, vaddr, use_doorbell);
 	if (rc)
@@ -3279,30 +3441,32 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
 	msleep(HPSA_POST_RESET_PAUSE_MSECS);
 
 	/* Wait for board to become not ready, then ready. */
-	dev_info(&pdev->dev, "Waiting for board to become ready.\n");
+	dev_info(&pdev->dev, "Waiting for board to reset.\n");
 	rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
-	if (rc)
+	if (rc) {
 		dev_warn(&pdev->dev,
-			"failed waiting for board to become not ready\n");
+			"failed waiting for board to reset."
+			" Will try soft reset.\n");
+		rc = -ENOTSUPP; /* Not expected, but try soft reset later */
+		goto unmap_cfgtable;
+	}
 	rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_READY);
 	if (rc) {
 		dev_warn(&pdev->dev,
-			"failed waiting for board to become ready\n");
+			"failed waiting for board to become ready "
+			"after hard reset\n");
 		goto unmap_cfgtable;
 	}
-	dev_info(&pdev->dev, "board ready.\n");
 
-	/* Controller should be in simple mode at this point.  If it's not,
-	 * It means we're on one of those controllers which doesn't support
-	 * the doorbell reset method and on which the PCI power management reset
-	 * method doesn't work (P800, for example.)
-	 * In those cases, don't try to proceed, as it generally doesn't work.
-	 */
-	active_transport = readl(&cfgtable->TransportActive);
-	if (active_transport & PERFORMANT_MODE) {
-		dev_warn(&pdev->dev, "Unable to successfully reset controller,"
-			" Ignoring controller.\n");
-		rc = -ENODEV;
+	rc = controller_reset_failed(vaddr);
+	if (rc < 0)
+		goto unmap_cfgtable;
+	if (rc) {
+		dev_warn(&pdev->dev, "Unable to successfully reset "
+			"controller. Will try soft reset.\n");
+		rc = -ENOTSUPP;
+	} else {
+		dev_info(&pdev->dev, "board ready after hard reset.\n");
 	}
 
 unmap_cfgtable:
@@ -3543,6 +3707,9 @@ static int __devinit hpsa_find_cfgtables(struct ctlr_info *h)
 		       cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));
 	if (!h->cfgtable)
 		return -ENOMEM;
+	rc = write_driver_ver_to_cfgtable(h->cfgtable);
+	if (rc)
+		return rc;
 	/* Find performant mode table. */
 	trans_offset = readl(&h->cfgtable->TransMethodOffset);
 	h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
@@ -3777,11 +3944,12 @@ static __devinit int hpsa_init_reset_devices(struct pci_dev *pdev)
 	 * due to concerns about shared bbwc between 6402/6404 pair.
 	 */
 	if (rc == -ENOTSUPP)
-		return 0; /* just try to do the kdump anyhow. */
+		return rc; /* just try to do the kdump anyhow. */
 	if (rc)
 		return -ENODEV;
 
 	/* Now try to get the controller to respond to a no-op */
+	dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
 	for (i = 0; i < HPSA_POST_RESET_NOOP_RETRIES; i++) {
 		if (hpsa_noop(pdev) == 0)
 			break;
@@ -3792,18 +3960,133 @@ static __devinit int hpsa_init_reset_devices(struct pci_dev *pdev)
 	return 0;
 }
 
+static __devinit int hpsa_allocate_cmd_pool(struct ctlr_info *h)
+{
+	h->cmd_pool_bits = kzalloc(
+		DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
+		sizeof(unsigned long), GFP_KERNEL);
+	h->cmd_pool = pci_alloc_consistent(h->pdev,
+		    h->nr_cmds * sizeof(*h->cmd_pool),
+		    &(h->cmd_pool_dhandle));
+	h->errinfo_pool = pci_alloc_consistent(h->pdev,
+		    h->nr_cmds * sizeof(*h->errinfo_pool),
+		    &(h->errinfo_pool_dhandle));
+	if ((h->cmd_pool_bits == NULL)
+	    || (h->cmd_pool == NULL)
+	    || (h->errinfo_pool == NULL)) {
+		dev_err(&h->pdev->dev, "out of memory in %s", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void hpsa_free_cmd_pool(struct ctlr_info *h)
+{
+	kfree(h->cmd_pool_bits);
+	if (h->cmd_pool)
+		pci_free_consistent(h->pdev,
+			    h->nr_cmds * sizeof(struct CommandList),
+			    h->cmd_pool, h->cmd_pool_dhandle);
+	if (h->errinfo_pool)
+		pci_free_consistent(h->pdev,
+			    h->nr_cmds * sizeof(struct ErrorInfo),
+			    h->errinfo_pool,
+			    h->errinfo_pool_dhandle);
+}
+
+static int hpsa_request_irq(struct ctlr_info *h,
+	irqreturn_t (*msixhandler)(int, void *),
+	irqreturn_t (*intxhandler)(int, void *))
+{
+	int rc;
+
+	if (h->msix_vector || h->msi_vector)
+		rc = request_irq(h->intr[h->intr_mode], msixhandler,
+				IRQF_DISABLED, h->devname, h);
+	else
+		rc = request_irq(h->intr[h->intr_mode], intxhandler,
+				IRQF_DISABLED, h->devname, h);
+	if (rc) {
+		dev_err(&h->pdev->dev, "unable to get irq %d for %s\n",
+		       h->intr[h->intr_mode], h->devname);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int __devinit hpsa_kdump_soft_reset(struct ctlr_info *h)
+{
+	if (hpsa_send_host_reset(h, RAID_CTLR_LUNID,
+		HPSA_RESET_TYPE_CONTROLLER)) {
+		dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
+		return -EIO;
+	}
+
+	dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
+	if (hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
+		dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
+		return -1;
+	}
+
+	dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
+	if (hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
+		dev_warn(&h->pdev->dev, "Board failed to become ready "
+			"after soft reset.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
+{
+	free_irq(h->intr[h->intr_mode], h);
+#ifdef CONFIG_PCI_MSI
+	if (h->msix_vector)
+		pci_disable_msix(h->pdev);
+	else if (h->msi_vector)
+		pci_disable_msi(h->pdev);
+#endif /* CONFIG_PCI_MSI */
+	hpsa_free_sg_chain_blocks(h);
+	hpsa_free_cmd_pool(h);
+	kfree(h->blockFetchTable);
+	pci_free_consistent(h->pdev, h->reply_pool_size,
+		h->reply_pool, h->reply_pool_dhandle);
+	if (h->vaddr)
+		iounmap(h->vaddr);
+	if (h->transtable)
+		iounmap(h->transtable);
+	if (h->cfgtable)
+		iounmap(h->cfgtable);
+	pci_release_regions(h->pdev);
+	kfree(h);
+}
+
 static int __devinit hpsa_init_one(struct pci_dev *pdev,
 				    const struct pci_device_id *ent)
 {
 	int dac, rc;
 	struct ctlr_info *h;
+	int try_soft_reset = 0;
+	unsigned long flags;
 
 	if (number_of_controllers == 0)
 		printk(KERN_INFO DRIVER_NAME "\n");
 
 	rc = hpsa_init_reset_devices(pdev);
-	if (rc)
-		return rc;
+	if (rc) {
+		if (rc != -ENOTSUPP)
+			return rc;
+		/* If the reset fails in a particular way (it has no way to do
+		 * a proper hard reset, so returns -ENOTSUPP) we can try to do
+		 * a soft reset once we get the controller configured up to the
+		 * point that it can accept a command.
+		 */
+		try_soft_reset = 1;
+		rc = 0;
+	}
+
+reinit_after_soft_reset:
 
 	/* Command structures must be aligned on a 32-byte boundary because
 	 * the 5 lower bits of the address are used by the hardware. and by
@@ -3847,54 +4130,82 @@ static int __devinit hpsa_init_one(struct pci_dev *pdev,
 	/* make sure the board interrupts are off */
 	h->access.set_intr_mask(h, HPSA_INTR_OFF);
 
-	if (h->msix_vector || h->msi_vector)
-		rc = request_irq(h->intr[h->intr_mode], do_hpsa_intr_msi,
-				IRQF_DISABLED, h->devname, h);
-	else
-		rc = request_irq(h->intr[h->intr_mode], do_hpsa_intr_intx,
-				IRQF_DISABLED, h->devname, h);
-	if (rc) {
-		dev_err(&pdev->dev, "unable to get irq %d for %s\n",
-		       h->intr[h->intr_mode], h->devname);
+	if (hpsa_request_irq(h, do_hpsa_intr_msi, do_hpsa_intr_intx))
 		goto clean2;
-	}
-
 	dev_info(&pdev->dev, "%s: <0x%x> at IRQ %d%s using DAC\n",
 	       h->devname, pdev->device,
 	       h->intr[h->intr_mode], dac ? "" : " not");
-
-	h->cmd_pool_bits =
-	    kmalloc(((h->nr_cmds + BITS_PER_LONG -
-		      1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL);
-	h->cmd_pool = pci_alloc_consistent(h->pdev,
-		    h->nr_cmds * sizeof(*h->cmd_pool),
-		    &(h->cmd_pool_dhandle));
-	h->errinfo_pool = pci_alloc_consistent(h->pdev,
-		    h->nr_cmds * sizeof(*h->errinfo_pool),
-		    &(h->errinfo_pool_dhandle));
-	if ((h->cmd_pool_bits == NULL)
-	    || (h->cmd_pool == NULL)
-	    || (h->errinfo_pool == NULL)) {
-		dev_err(&pdev->dev, "out of memory");
-		rc = -ENOMEM;
+	if (hpsa_allocate_cmd_pool(h))
 		goto clean4;
-	}
 	if (hpsa_allocate_sg_chain_blocks(h))
 		goto clean4;
 	init_waitqueue_head(&h->scan_wait_queue);
 	h->scan_finished = 1; /* no scan currently in progress */
 
 	pci_set_drvdata(pdev, h);
-	memset(h->cmd_pool_bits, 0,
-	       ((h->nr_cmds + BITS_PER_LONG -
-		 1) / BITS_PER_LONG) * sizeof(unsigned long));
+	h->ndevices = 0;
+	h->scsi_host = NULL;
+	spin_lock_init(&h->devlock);
+	hpsa_put_ctlr_into_performant_mode(h);
+
+	/* At this point, the controller is ready to take commands.
+	 * Now, if reset_devices and the hard reset didn't work, try
+	 * the soft reset and see if that works.
+	 */
+	if (try_soft_reset) {
+
+		/* This is kind of gross.  We may or may not get a completion
+		 * from the soft reset command, and if we do, then the value
+		 * from the fifo may or may not be valid.  So, we wait 10 secs
+		 * after the reset throwing away any completions we get during
+		 * that time.  Unregister the interrupt handler and register
+		 * fake ones to scoop up any residual completions.
+		 */
+		spin_lock_irqsave(&h->lock, flags);
+		h->access.set_intr_mask(h, HPSA_INTR_OFF);
+		spin_unlock_irqrestore(&h->lock, flags);
+		free_irq(h->intr[h->intr_mode], h);
+		rc = hpsa_request_irq(h, hpsa_msix_discard_completions,
+					hpsa_intx_discard_completions);
+		if (rc) {
+			dev_warn(&h->pdev->dev, "Failed to request_irq after "
+				"soft reset.\n");
+			goto clean4;
+		}
+
+		rc = hpsa_kdump_soft_reset(h);
+		if (rc)
+			/* Neither hard nor soft reset worked, we're hosed. */
+			goto clean4;
+
+		dev_info(&h->pdev->dev, "Board READY.\n");
+		dev_info(&h->pdev->dev,
+			"Waiting for stale completions to drain.\n");
+		h->access.set_intr_mask(h, HPSA_INTR_ON);
+		msleep(10000);
+		h->access.set_intr_mask(h, HPSA_INTR_OFF);
+
+		rc = controller_reset_failed(h->cfgtable);
+		if (rc)
+			dev_info(&h->pdev->dev,
+				"Soft reset appears to have failed.\n");
+
+		/* since the controller's reset, we have to go back and re-init
+		 * everything.  Easiest to just forget what we've done and do it
+		 * all over again.
+		 */
+		hpsa_undo_allocations_after_kdump_soft_reset(h);
+		try_soft_reset = 0;
+		if (rc)
+			/* don't go to clean4, we already unallocated */
+			return -ENODEV;
 
-	hpsa_scsi_setup(h);
+		goto reinit_after_soft_reset;
+	}
 
 	/* Turn the interrupts on so we can service requests */
 	h->access.set_intr_mask(h, HPSA_INTR_ON);
 
-	hpsa_put_ctlr_into_performant_mode(h);
 	hpsa_hba_inquiry(h);
 	hpsa_register_scsi(h);	/* hook ourselves into SCSI subsystem */
 	h->busy_initializing = 0;
@@ -3902,16 +4213,7 @@ static int __devinit hpsa_init_one(struct pci_dev *pdev,
 
 clean4:
 	hpsa_free_sg_chain_blocks(h);
-	kfree(h->cmd_pool_bits);
-	if (h->cmd_pool)
-		pci_free_consistent(h->pdev,
-			    h->nr_cmds * sizeof(struct CommandList),
-			    h->cmd_pool, h->cmd_pool_dhandle);
-	if (h->errinfo_pool)
-		pci_free_consistent(h->pdev,
-			    h->nr_cmds * sizeof(struct ErrorInfo),
-			    h->errinfo_pool,
-			    h->errinfo_pool_dhandle);
+	hpsa_free_cmd_pool(h);
 	free_irq(h->intr[h->intr_mode], h);
 clean2:
 clean1:
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 621a1530054a..6d8dcd4dd06b 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -127,10 +127,12 @@ struct ctlr_info {
 };
 #define HPSA_ABORT_MSG 0
 #define HPSA_DEVICE_RESET_MSG 1
-#define HPSA_BUS_RESET_MSG 2
-#define HPSA_HOST_RESET_MSG 3
+#define HPSA_RESET_TYPE_CONTROLLER 0x00
+#define HPSA_RESET_TYPE_BUS 0x01
+#define HPSA_RESET_TYPE_TARGET 0x03
+#define HPSA_RESET_TYPE_LUN 0x04
 #define HPSA_MSG_SEND_RETRY_LIMIT 10
-#define HPSA_MSG_SEND_RETRY_INTERVAL_MSECS 1000
+#define HPSA_MSG_SEND_RETRY_INTERVAL_MSECS (10000)
 
 /* Maximum time in seconds driver will wait for command completions
  * when polling before giving up.
@@ -155,7 +157,7 @@ struct ctlr_info {
  * HPSA_BOARD_READY_ITERATIONS are derived from those.
  */
 #define HPSA_BOARD_READY_WAIT_SECS (120)
-#define HPSA_BOARD_NOT_READY_WAIT_SECS (10)
+#define HPSA_BOARD_NOT_READY_WAIT_SECS (100)
 #define HPSA_BOARD_READY_POLL_INTERVAL_MSECS (100)
 #define HPSA_BOARD_READY_POLL_INTERVAL \
 	((HPSA_BOARD_READY_POLL_INTERVAL_MSECS * HZ) / 1000)
@@ -212,6 +214,7 @@ static void SA5_submit_command(struct ctlr_info *h,
 	dev_dbg(&h->pdev->dev, "Sending %x, tag = %x\n", c->busaddr,
 		c->Header.Tag.lower);
 	writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
+	(void) readl(h->vaddr + SA5_REQUEST_PORT_OFFSET);
 	h->commands_outstanding++;
 	if (h->commands_outstanding > h->max_outstanding)
 		h->max_outstanding = h->commands_outstanding;
@@ -227,10 +230,12 @@ static void SA5_intr_mask(struct ctlr_info *h, unsigned long val)
 	if (val) { /* Turn interrupts on */
 		h->interrupts_enabled = 1;
 		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	} else { /* Turn them off */
 		h->interrupts_enabled = 0;
 		writel(SA5_INTR_OFF,
 			h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	}
 }
 
@@ -239,10 +244,12 @@ static void SA5_performant_intr_mask(struct ctlr_info *h, unsigned long val)
 	if (val) { /* turn on interrupts */
 		h->interrupts_enabled = 1;
 		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	} else {
 		h->interrupts_enabled = 0;
 		writel(SA5_PERF_INTR_OFF,
 			h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	}
 }
 
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 18464900e761..55d741b019db 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -101,6 +101,7 @@
 #define CFGTBL_ChangeReq        0x00000001l
 #define CFGTBL_AccCmds          0x00000001l
 #define DOORBELL_CTLR_RESET	0x00000004l
+#define DOORBELL_CTLR_RESET2	0x00000020l
 
 #define CFGTBL_Trans_Simple     0x00000002l
 #define CFGTBL_Trans_Performant 0x00000004l
@@ -256,14 +257,6 @@ struct ErrorInfo {
 #define CMD_IOCTL_PEND  0x01
 #define CMD_SCSI	0x03
 
-/* This structure needs to be divisible by 32 for new
- * indexing method and performant mode.
- */
-#define PAD32 32
-#define PAD64DIFF 0
-#define USEEXTRA ((sizeof(void *) - 4)/4)
-#define PADSIZE (PAD32 + PAD64DIFF * USEEXTRA)
-
 #define DIRECT_LOOKUP_SHIFT 5
 #define DIRECT_LOOKUP_BIT 0x10
 #define DIRECT_LOOKUP_MASK (~((1 << DIRECT_LOOKUP_SHIFT) - 1))
@@ -345,6 +338,8 @@ struct CfgTable {
 	u8		reserved[0x78 - 0x58];
 	u32		misc_fw_support; /* offset 0x78 */
 #define			MISC_FW_DOORBELL_RESET (0x02)
+#define			MISC_FW_DOORBELL_RESET2 (0x010)
+	u8		driver_version[32];
 };
 
 #define NUM_BLOCKFETCH_ENTRIES 8
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 041958453e2a..3d391dc3f11f 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1849,8 +1849,7 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
 		rc = ibmvscsi_ops->reset_crq_queue(&hostdata->queue, hostdata);
 		if (!rc)
 			rc = ibmvscsi_ops->send_crq(hostdata, 0xC001000000000000LL, 0);
-		if (!rc)
-			rc = vio_enable_interrupts(to_vio_dev(hostdata->dev));
+		vio_enable_interrupts(to_vio_dev(hostdata->dev));
 	} else if (hostdata->reenable_crq) {
 		smp_rmb();
 		action = "enable";
diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c
index 6568aab745a0..92109b126391 100644
--- a/drivers/scsi/in2000.c
+++ b/drivers/scsi/in2000.c
@@ -343,7 +343,7 @@ static int in2000_queuecommand_lck(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
 	instance = cmd->device->host;
 	hostdata = (struct IN2000_hostdata *) instance->hostdata;
 
-	DB(DB_QUEUE_COMMAND, scmd_printk(KERN_DEBUG, cmd, "Q-%02x-%ld(", cmd->cmnd[0], cmd->serial_number))
+	DB(DB_QUEUE_COMMAND, scmd_printk(KERN_DEBUG, cmd, "Q-%02x(", cmd->cmnd[0]))
 
 /* Set up a few fields in the Scsi_Cmnd structure for our own use:
  *  - host_scribble is the pointer to the next cmd in the input queue
@@ -427,7 +427,7 @@ static int in2000_queuecommand_lck(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
 
 	in2000_execute(cmd->device->host);
 
-	DB(DB_QUEUE_COMMAND, printk(")Q-%ld ", cmd->serial_number))
+	DB(DB_QUEUE_COMMAND, printk(")Q "))
 	    return 0;
 }
 
@@ -705,7 +705,7 @@ static void in2000_execute(struct Scsi_Host *instance)
 	 * to search the input_Q again...
 	 */
 
-	DB(DB_EXECUTE, printk("%s%ld)EX-2 ", (cmd->SCp.phase) ? "d:" : "", cmd->serial_number))
+	DB(DB_EXECUTE, printk("%s)EX-2 ", (cmd->SCp.phase) ? "d:" : ""))
 
 }
 
@@ -1149,7 +1149,7 @@ static irqreturn_t in2000_intr(int irqnum, void *dev_id)
 	case CSR_XFER_DONE | PHS_COMMAND:
 	case CSR_UNEXP | PHS_COMMAND:
 	case CSR_SRV_REQ | PHS_COMMAND:
-		DB(DB_INTR, printk("CMND-%02x,%ld", cmd->cmnd[0], cmd->serial_number))
+		DB(DB_INTR, printk("CMND-%02x", cmd->cmnd[0]))
 		    transfer_pio(cmd->cmnd, cmd->cmd_len, DATA_OUT_DIR, hostdata);
 		hostdata->state = S_CONNECTED;
 		break;
@@ -1191,7 +1191,7 @@ static irqreturn_t in2000_intr(int irqnum, void *dev_id)
 		switch (msg) {
 
 		case COMMAND_COMPLETE:
-			DB(DB_INTR, printk("CCMP-%ld", cmd->serial_number))
+			DB(DB_INTR, printk("CCMP"))
 			    write_3393_cmd(hostdata, WD_CMD_NEGATE_ACK);
 			hostdata->state = S_PRE_CMP_DISC;
 			break;
@@ -1329,7 +1329,7 @@ static irqreturn_t in2000_intr(int irqnum, void *dev_id)
 
 		write_3393(hostdata, WD_SOURCE_ID, SRCID_ER);
 		if (phs == 0x60) {
-			DB(DB_INTR, printk("SX-DONE-%ld", cmd->serial_number))
+			DB(DB_INTR, printk("SX-DONE"))
 			    cmd->SCp.Message = COMMAND_COMPLETE;
 			lun = read_3393(hostdata, WD_TARGET_LUN);
 			DB(DB_INTR, printk(":%d.%d", cmd->SCp.Status, lun))
@@ -1350,7 +1350,7 @@ static irqreturn_t in2000_intr(int irqnum, void *dev_id)
 
 			in2000_execute(instance);
 		} else {
-			printk("%02x:%02x:%02x-%ld: Unknown SEL_XFER_DONE phase!!---", asr, sr, phs, cmd->serial_number);
+			printk("%02x:%02x:%02x: Unknown SEL_XFER_DONE phase!!---", asr, sr, phs);
 		}
 		break;
 
@@ -1417,7 +1417,7 @@ static irqreturn_t in2000_intr(int irqnum, void *dev_id)
 			spin_unlock_irqrestore(instance->host_lock, flags);
 			return IRQ_HANDLED;
 		}
-		DB(DB_INTR, printk("UNEXP_DISC-%ld", cmd->serial_number))
+		DB(DB_INTR, printk("UNEXP_DISC"))
 		    hostdata->connected = NULL;
 		hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
 		hostdata->state = S_UNCONNECTED;
@@ -1442,7 +1442,7 @@ static irqreturn_t in2000_intr(int irqnum, void *dev_id)
  */
 
 		write_3393(hostdata, WD_SOURCE_ID, SRCID_ER);
-		DB(DB_INTR, printk("DISC-%ld", cmd->serial_number))
+		DB(DB_INTR, printk("DISC"))
 		    if (cmd == NULL) {
 			printk(" - Already disconnected! ");
 			hostdata->state = S_UNCONNECTED;
@@ -1575,7 +1575,6 @@ static irqreturn_t in2000_intr(int irqnum, void *dev_id)
 		} else
 			hostdata->state = S_CONNECTED;
 
-		DB(DB_INTR, printk("-%ld", cmd->serial_number))
 		    break;
 
 	default:
@@ -1704,7 +1703,7 @@ static int __in2000_abort(Scsi_Cmnd * cmd)
 				prev->host_scribble = cmd->host_scribble;
 			cmd->host_scribble = NULL;
 			cmd->result = DID_ABORT << 16;
-			printk(KERN_WARNING "scsi%d: Abort - removing command %ld from input_Q. ", instance->host_no, cmd->serial_number);
+			printk(KERN_WARNING "scsi%d: Abort - removing command from input_Q. ", instance->host_no);
 			cmd->scsi_done(cmd);
 			return SUCCESS;
 		}
@@ -1725,7 +1724,7 @@ static int __in2000_abort(Scsi_Cmnd * cmd)
 
 	if (hostdata->connected == cmd) {
 
-		printk(KERN_WARNING "scsi%d: Aborting connected command %ld - ", instance->host_no, cmd->serial_number);
+		printk(KERN_WARNING "scsi%d: Aborting connected command - ", instance->host_no);
 
 		printk("sending wd33c93 ABORT command - ");
 		write_3393(hostdata, WD_CONTROL, CTRL_IDI | CTRL_EDI | CTRL_POLLED);
@@ -2270,7 +2269,7 @@ static int in2000_proc_info(struct Scsi_Host *instance, char *buf, char **start,
 		strcat(bp, "\nconnected:     ");
 		if (hd->connected) {
 			cmd = (Scsi_Cmnd *) hd->connected;
-			sprintf(tbuf, " %ld-%d:%d(%02x)", cmd->serial_number, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
+			sprintf(tbuf, " %d:%d(%02x)", cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
 			strcat(bp, tbuf);
 		}
 	}
@@ -2278,7 +2277,7 @@ static int in2000_proc_info(struct Scsi_Host *instance, char *buf, char **start,
 		strcat(bp, "\ninput_Q:       ");
 		cmd = (Scsi_Cmnd *) hd->input_Q;
 		while (cmd) {
-			sprintf(tbuf, " %ld-%d:%d(%02x)", cmd->serial_number, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
+			sprintf(tbuf, " %d:%d(%02x)", cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
 			strcat(bp, tbuf);
 			cmd = (Scsi_Cmnd *) cmd->host_scribble;
 		}
@@ -2287,7 +2286,7 @@ static int in2000_proc_info(struct Scsi_Host *instance, char *buf, char **start,
 		strcat(bp, "\ndisconnected_Q:");
 		cmd = (Scsi_Cmnd *) hd->disconnected_Q;
 		while (cmd) {
-			sprintf(tbuf, " %ld-%d:%d(%02x)", cmd->serial_number, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
+			sprintf(tbuf, " %d:%d(%02x)", cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
 			strcat(bp, tbuf);
 			cmd = (Scsi_Cmnd *) cmd->host_scribble;
 		}
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 0621238fac4a..12868ca46110 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -60,6 +60,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <linux/ioport.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
@@ -2717,13 +2718,18 @@ static int ipr_sdt_copy(struct ipr_ioa_cfg *ioa_cfg,
 			unsigned long pci_address, u32 length)
 {
 	int bytes_copied = 0;
-	int cur_len, rc, rem_len, rem_page_len;
+	int cur_len, rc, rem_len, rem_page_len, max_dump_size;
 	__be32 *page;
 	unsigned long lock_flags = 0;
 	struct ipr_ioa_dump *ioa_dump = &ioa_cfg->dump->ioa_dump;
 
+	if (ioa_cfg->sis64)
+		max_dump_size = IPR_FMT3_MAX_IOA_DUMP_SIZE;
+	else
+		max_dump_size = IPR_FMT2_MAX_IOA_DUMP_SIZE;
+
 	while (bytes_copied < length &&
-	       (ioa_dump->hdr.len + bytes_copied) < IPR_MAX_IOA_DUMP_SIZE) {
+	       (ioa_dump->hdr.len + bytes_copied) < max_dump_size) {
 		if (ioa_dump->page_offset >= PAGE_SIZE ||
 		    ioa_dump->page_offset == 0) {
 			page = (__be32 *)__get_free_page(GFP_ATOMIC);
@@ -2885,8 +2891,8 @@ static void ipr_get_ioa_dump(struct ipr_ioa_cfg *ioa_cfg, struct ipr_dump *dump)
 	unsigned long lock_flags = 0;
 	struct ipr_driver_dump *driver_dump = &dump->driver_dump;
 	struct ipr_ioa_dump *ioa_dump = &dump->ioa_dump;
-	u32 num_entries, start_off, end_off;
-	u32 bytes_to_copy, bytes_copied, rc;
+	u32 num_entries, max_num_entries, start_off, end_off;
+	u32 max_dump_size, bytes_to_copy, bytes_copied, rc;
 	struct ipr_sdt *sdt;
 	int valid = 1;
 	int i;
@@ -2947,8 +2953,18 @@ static void ipr_get_ioa_dump(struct ipr_ioa_cfg *ioa_cfg, struct ipr_dump *dump)
 	 on entries in this table */
 	sdt = &ioa_dump->sdt;
 
+	if (ioa_cfg->sis64) {
+		max_num_entries = IPR_FMT3_NUM_SDT_ENTRIES;
+		max_dump_size = IPR_FMT3_MAX_IOA_DUMP_SIZE;
+	} else {
+		max_num_entries = IPR_FMT2_NUM_SDT_ENTRIES;
+		max_dump_size = IPR_FMT2_MAX_IOA_DUMP_SIZE;
+	}
+
+	bytes_to_copy = offsetof(struct ipr_sdt, entry) +
+			(max_num_entries * sizeof(struct ipr_sdt_entry));
 	rc = ipr_get_ldump_data_section(ioa_cfg, start_addr, (__be32 *)sdt,
-					sizeof(struct ipr_sdt) / sizeof(__be32));
+					bytes_to_copy / sizeof(__be32));
 
 	/* Smart Dump table is ready to use and the first entry is valid */
 	if (rc || ((be32_to_cpu(sdt->hdr.state) != IPR_FMT3_SDT_READY_TO_USE) &&
@@ -2964,13 +2980,20 @@ static void ipr_get_ioa_dump(struct ipr_ioa_cfg *ioa_cfg, struct ipr_dump *dump)
 
 	num_entries = be32_to_cpu(sdt->hdr.num_entries_used);
 
-	if (num_entries > IPR_NUM_SDT_ENTRIES)
-		num_entries = IPR_NUM_SDT_ENTRIES;
+	if (num_entries > max_num_entries)
+		num_entries = max_num_entries;
+
+	/* Update dump length to the actual data to be copied */
+	dump->driver_dump.hdr.len += sizeof(struct ipr_sdt_header);
+	if (ioa_cfg->sis64)
+		dump->driver_dump.hdr.len += num_entries * sizeof(struct ipr_sdt_entry);
+	else
+		dump->driver_dump.hdr.len += max_num_entries * sizeof(struct ipr_sdt_entry);
 
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 
 	for (i = 0; i < num_entries; i++) {
-		if (ioa_dump->hdr.len > IPR_MAX_IOA_DUMP_SIZE) {
+		if (ioa_dump->hdr.len > max_dump_size) {
 			driver_dump->hdr.status = IPR_DUMP_STATUS_QUAL_SUCCESS;
 			break;
 		}
@@ -2989,7 +3012,7 @@ static void ipr_get_ioa_dump(struct ipr_ioa_cfg *ioa_cfg, struct ipr_dump *dump)
 					valid = 0;
 			}
 			if (valid) {
-				if (bytes_to_copy > IPR_MAX_IOA_DUMP_SIZE) {
+				if (bytes_to_copy > max_dump_size) {
 					sdt->entry[i].flags &= ~IPR_SDT_VALID_ENTRY;
 					continue;
 				}
@@ -3044,6 +3067,7 @@ static void ipr_release_dump(struct kref *kref)
 	for (i = 0; i < dump->ioa_dump.next_page_index; i++)
 		free_page((unsigned long) dump->ioa_dump.ioa_data[i]);
 
+	vfree(dump->ioa_dump.ioa_data);
 	kfree(dump);
 	LEAVE;
 }
@@ -3835,7 +3859,7 @@ static ssize_t ipr_read_dump(struct file *filp, struct kobject *kobj,
 	struct ipr_dump *dump;
 	unsigned long lock_flags = 0;
 	char *src;
-	int len;
+	int len, sdt_end;
 	size_t rc = count;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -3875,9 +3899,17 @@ static ssize_t ipr_read_dump(struct file *filp, struct kobject *kobj,
 
 	off -= sizeof(dump->driver_dump);
 
-	if (count && off < offsetof(struct ipr_ioa_dump, ioa_data)) {
-		if (off + count > offsetof(struct ipr_ioa_dump, ioa_data))
-			len = offsetof(struct ipr_ioa_dump, ioa_data) - off;
+	if (ioa_cfg->sis64)
+		sdt_end = offsetof(struct ipr_ioa_dump, sdt.entry) +
+			  (be32_to_cpu(dump->ioa_dump.sdt.hdr.num_entries_used) *
+			   sizeof(struct ipr_sdt_entry));
+	else
+		sdt_end = offsetof(struct ipr_ioa_dump, sdt.entry) +
+			  (IPR_FMT2_NUM_SDT_ENTRIES * sizeof(struct ipr_sdt_entry));
+
+	if (count && off < sdt_end) {
+		if (off + count > sdt_end)
+			len = sdt_end - off;
 		else
 			len = count;
 		src = (u8 *)&dump->ioa_dump + off;
@@ -3887,7 +3919,7 @@ static ssize_t ipr_read_dump(struct file *filp, struct kobject *kobj,
 		count -= len;
 	}
 
-	off -= offsetof(struct ipr_ioa_dump, ioa_data);
+	off -= sdt_end;
 
 	while (count) {
 		if ((off & PAGE_MASK) != ((off + count) & PAGE_MASK))
@@ -3916,6 +3948,7 @@ static ssize_t ipr_read_dump(struct file *filp, struct kobject *kobj,
 static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg)
 {
 	struct ipr_dump *dump;
+	__be32 **ioa_data;
 	unsigned long lock_flags = 0;
 
 	dump = kzalloc(sizeof(struct ipr_dump), GFP_KERNEL);
@@ -3925,6 +3958,19 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg)
 		return -ENOMEM;
 	}
 
+	if (ioa_cfg->sis64)
+		ioa_data = vmalloc(IPR_FMT3_MAX_NUM_DUMP_PAGES * sizeof(__be32 *));
+	else
+		ioa_data = vmalloc(IPR_FMT2_MAX_NUM_DUMP_PAGES * sizeof(__be32 *));
+
+	if (!ioa_data) {
+		ipr_err("Dump memory allocation failed\n");
+		kfree(dump);
+		return -ENOMEM;
+	}
+
+	dump->ioa_dump.ioa_data = ioa_data;
+
 	kref_init(&dump->kref);
 	dump->ioa_cfg = ioa_cfg;
 
@@ -3932,6 +3978,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg)
 
 	if (INACTIVE != ioa_cfg->sdt_state) {
 		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		vfree(dump->ioa_dump.ioa_data);
 		kfree(dump);
 		return 0;
 	}
@@ -4953,9 +5000,35 @@ static int ipr_eh_abort(struct scsi_cmnd * scsi_cmd)
  * 	IRQ_NONE / IRQ_HANDLED
  **/
 static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
-					      volatile u32 int_reg)
+					      u32 int_reg)
 {
 	irqreturn_t rc = IRQ_HANDLED;
+	u32 int_mask_reg;
+
+	int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg32);
+	int_reg &= ~int_mask_reg;
+
+	/* If an interrupt on the adapter did not occur, ignore it.
+	 * Or in the case of SIS 64, check for a stage change interrupt.
+	 */
+	if ((int_reg & IPR_PCII_OPER_INTERRUPTS) == 0) {
+		if (ioa_cfg->sis64) {
+			int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg);
+			int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
+			if (int_reg & IPR_PCII_IPL_STAGE_CHANGE) {
+
+				/* clear stage change */
+				writel(IPR_PCII_IPL_STAGE_CHANGE, ioa_cfg->regs.clr_interrupt_reg);
+				int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
+				list_del(&ioa_cfg->reset_cmd->queue);
+				del_timer(&ioa_cfg->reset_cmd->timer);
+				ipr_reset_ioa_job(ioa_cfg->reset_cmd);
+				return IRQ_HANDLED;
+			}
+		}
+
+		return IRQ_NONE;
+	}
 
 	if (int_reg & IPR_PCII_IOA_TRANS_TO_OPER) {
 		/* Mask the interrupt */
@@ -4968,6 +5041,13 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
 		list_del(&ioa_cfg->reset_cmd->queue);
 		del_timer(&ioa_cfg->reset_cmd->timer);
 		ipr_reset_ioa_job(ioa_cfg->reset_cmd);
+	} else if ((int_reg & IPR_PCII_HRRQ_UPDATED) == int_reg) {
+		if (ipr_debug && printk_ratelimit())
+			dev_err(&ioa_cfg->pdev->dev,
+				"Spurious interrupt detected. 0x%08X\n", int_reg);
+		writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32);
+		int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
+		return IRQ_NONE;
 	} else {
 		if (int_reg & IPR_PCII_IOA_UNIT_CHECKED)
 			ioa_cfg->ioa_unit_checked = 1;
@@ -5016,10 +5096,11 @@ static irqreturn_t ipr_isr(int irq, void *devp)
 {
 	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp;
 	unsigned long lock_flags = 0;
-	volatile u32 int_reg, int_mask_reg;
+	u32 int_reg = 0;
 	u32 ioasc;
 	u16 cmd_index;
 	int num_hrrq = 0;
+	int irq_none = 0;
 	struct ipr_cmnd *ipr_cmd;
 	irqreturn_t rc = IRQ_NONE;
 
@@ -5031,33 +5112,6 @@ static irqreturn_t ipr_isr(int irq, void *devp)
 		return IRQ_NONE;
 	}
 
-	int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg32);
-	int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32) & ~int_mask_reg;
-
-	/* If an interrupt on the adapter did not occur, ignore it.
-	 * Or in the case of SIS 64, check for a stage change interrupt.
-	 */
-	if (unlikely((int_reg & IPR_PCII_OPER_INTERRUPTS) == 0)) {
-		if (ioa_cfg->sis64) {
-			int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg);
-			int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
-			if (int_reg & IPR_PCII_IPL_STAGE_CHANGE) {
-
-				/* clear stage change */
-				writel(IPR_PCII_IPL_STAGE_CHANGE, ioa_cfg->regs.clr_interrupt_reg);
-				int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
-				list_del(&ioa_cfg->reset_cmd->queue);
-				del_timer(&ioa_cfg->reset_cmd->timer);
-				ipr_reset_ioa_job(ioa_cfg->reset_cmd);
-				spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-				return IRQ_HANDLED;
-			}
-		}
-
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-		return IRQ_NONE;
-	}
-
 	while (1) {
 		ipr_cmd = NULL;
 
@@ -5097,7 +5151,7 @@ static irqreturn_t ipr_isr(int irq, void *devp)
 			/* Clear the PCI interrupt */
 			do {
 				writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32);
-				int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32) & ~int_mask_reg;
+				int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
 			} while (int_reg & IPR_PCII_HRRQ_UPDATED &&
 					num_hrrq++ < IPR_MAX_HRRQ_RETRIES);
 
@@ -5107,6 +5161,9 @@ static irqreturn_t ipr_isr(int irq, void *devp)
 				return IRQ_HANDLED;
 			}
 
+		} else if (rc == IRQ_NONE && irq_none == 0) {
+			int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
+			irq_none++;
 		} else
 			break;
 	}
@@ -5143,7 +5200,8 @@ static int ipr_build_ioadl64(struct ipr_ioa_cfg *ioa_cfg,
 
 	nseg = scsi_dma_map(scsi_cmd);
 	if (nseg < 0) {
-		dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n");
+		if (printk_ratelimit())
+			dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n");
 		return -1;
 	}
 
@@ -5773,7 +5831,8 @@ static int ipr_queuecommand_lck(struct scsi_cmnd *scsi_cmd,
 		}
 
 		ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_LINK_DESC;
-		ioarcb->cmd_pkt.flags_lo |= IPR_FLAGS_LO_DELAY_AFTER_RST;
+		if (ipr_is_gscsi(res))
+			ioarcb->cmd_pkt.flags_lo |= IPR_FLAGS_LO_DELAY_AFTER_RST;
 		ioarcb->cmd_pkt.flags_lo |= IPR_FLAGS_LO_ALIGNED_BFR;
 		ioarcb->cmd_pkt.flags_lo |= ipr_get_task_attributes(scsi_cmd);
 	}
@@ -7516,7 +7575,7 @@ static int ipr_reset_get_unit_check_job(struct ipr_cmnd *ipr_cmd)
 static int ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
 {
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-	volatile u32 int_reg;
+	u32 int_reg;
 
 	ENTER;
 	ioa_cfg->pdev->state_saved = true;
@@ -7555,7 +7614,10 @@ static int ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
 		ipr_cmd->job_step = ipr_reset_enable_ioa;
 
 		if (GET_DUMP == ioa_cfg->sdt_state) {
-			ipr_reset_start_timer(ipr_cmd, IPR_DUMP_TIMEOUT);
+			if (ioa_cfg->sis64)
+				ipr_reset_start_timer(ipr_cmd, IPR_SIS64_DUMP_TIMEOUT);
+			else
+				ipr_reset_start_timer(ipr_cmd, IPR_SIS32_DUMP_TIMEOUT);
 			ipr_cmd->job_step = ipr_reset_wait_for_dump;
 			schedule_work(&ioa_cfg->work_q);
 			return IPR_RC_JOB_RETURN;
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index 13f425fb8851..f93f8637c5a1 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -38,8 +38,8 @@
 /*
  * Literals
  */
-#define IPR_DRIVER_VERSION "2.5.1"
-#define IPR_DRIVER_DATE "(August 10, 2010)"
+#define IPR_DRIVER_VERSION "2.5.2"
+#define IPR_DRIVER_DATE "(April 27, 2011)"
 
 /*
  * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding
@@ -217,7 +217,8 @@
 #define IPR_CHECK_FOR_RESET_TIMEOUT		(HZ / 10)
 #define IPR_WAIT_FOR_BIST_TIMEOUT		(2 * HZ)
 #define IPR_PCI_RESET_TIMEOUT			(HZ / 2)
-#define IPR_DUMP_TIMEOUT			(15 * HZ)
+#define IPR_SIS32_DUMP_TIMEOUT			(15 * HZ)
+#define IPR_SIS64_DUMP_TIMEOUT			(40 * HZ)
 #define IPR_DUMP_DELAY_SECONDS			4
 #define IPR_DUMP_DELAY_TIMEOUT			(IPR_DUMP_DELAY_SECONDS * HZ)
 
@@ -285,9 +286,12 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR)
 /*
  * Dump literals
  */
-#define IPR_MAX_IOA_DUMP_SIZE				(4 * 1024 * 1024)
-#define IPR_NUM_SDT_ENTRIES				511
-#define IPR_MAX_NUM_DUMP_PAGES	((IPR_MAX_IOA_DUMP_SIZE / PAGE_SIZE) + 1)
+#define IPR_FMT2_MAX_IOA_DUMP_SIZE			(4 * 1024 * 1024)
+#define IPR_FMT3_MAX_IOA_DUMP_SIZE			(32 * 1024 * 1024)
+#define IPR_FMT2_NUM_SDT_ENTRIES			511
+#define IPR_FMT3_NUM_SDT_ENTRIES			0xFFF
+#define IPR_FMT2_MAX_NUM_DUMP_PAGES	((IPR_FMT2_MAX_IOA_DUMP_SIZE / PAGE_SIZE) + 1)
+#define IPR_FMT3_MAX_NUM_DUMP_PAGES	((IPR_FMT3_MAX_IOA_DUMP_SIZE / PAGE_SIZE) + 1)
 
 /*
  * Misc literals
@@ -474,7 +478,7 @@ struct ipr_cmd_pkt {
 
 	u8 flags_lo;
 #define IPR_FLAGS_LO_ALIGNED_BFR		0x20
-#define IPR_FLAGS_LO_DELAY_AFTER_RST	0x10
+#define IPR_FLAGS_LO_DELAY_AFTER_RST		0x10
 #define IPR_FLAGS_LO_UNTAGGED_TASK		0x00
 #define IPR_FLAGS_LO_SIMPLE_TASK		0x02
 #define IPR_FLAGS_LO_ORDERED_TASK		0x04
@@ -1164,7 +1168,7 @@ struct ipr_sdt_header {
 
 struct ipr_sdt {
 	struct ipr_sdt_header hdr;
-	struct ipr_sdt_entry entry[IPR_NUM_SDT_ENTRIES];
+	struct ipr_sdt_entry entry[IPR_FMT3_NUM_SDT_ENTRIES];
 }__attribute__((packed, aligned (4)));
 
 struct ipr_uc_sdt {
@@ -1608,7 +1612,7 @@ struct ipr_driver_dump {
 struct ipr_ioa_dump {
 	struct ipr_dump_entry_header hdr;
 	struct ipr_sdt sdt;
-	__be32 *ioa_data[IPR_MAX_NUM_DUMP_PAGES];
+	__be32 **ioa_data;
 	u32 reserved;
 	u32 next_page_index;
 	u32 page_offset;
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 5b799a37ad09..2a3a4720a771 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -57,9 +57,6 @@ static struct kmem_cache *scsi_pkt_cachep;
 #define FC_SRB_READ		(1 << 1)
 #define FC_SRB_WRITE		(1 << 0)
 
-/* constant added to e_d_tov timeout to get rec_tov value */
-#define REC_TOV_CONST		1
-
 /*
  * The SCp.ptr should be tested and set under the scsi_pkt_queue lock
  */
@@ -248,7 +245,7 @@ static inline void fc_fcp_unlock_pkt(struct fc_fcp_pkt *fsp)
 /**
  * fc_fcp_timer_set() - Start a timer for a fcp_pkt
  * @fsp:   The FCP packet to start a timer for
- * @delay: The timeout period for the timer
+ * @delay: The timeout period in jiffies
  */
 static void fc_fcp_timer_set(struct fc_fcp_pkt *fsp, unsigned long delay)
 {
@@ -335,22 +332,23 @@ static void fc_fcp_ddp_done(struct fc_fcp_pkt *fsp)
 /**
  * fc_fcp_can_queue_ramp_up() - increases can_queue
  * @lport: lport to ramp up can_queue
- *
- * Locking notes: Called with Scsi_Host lock held
  */
 static void fc_fcp_can_queue_ramp_up(struct fc_lport *lport)
 {
 	struct fc_fcp_internal *si = fc_get_scsi_internal(lport);
+	unsigned long flags;
 	int can_queue;
 
+	spin_lock_irqsave(lport->host->host_lock, flags);
+
 	if (si->last_can_queue_ramp_up_time &&
 	    (time_before(jiffies, si->last_can_queue_ramp_up_time +
 			 FC_CAN_QUEUE_PERIOD)))
-		return;
+		goto unlock;
 
 	if (time_before(jiffies, si->last_can_queue_ramp_down_time +
 			FC_CAN_QUEUE_PERIOD))
-		return;
+		goto unlock;
 
 	si->last_can_queue_ramp_up_time = jiffies;
 
@@ -362,6 +360,9 @@ static void fc_fcp_can_queue_ramp_up(struct fc_lport *lport)
 	lport->host->can_queue = can_queue;
 	shost_printk(KERN_ERR, lport->host, "libfc: increased "
 		     "can_queue to %d.\n", can_queue);
+
+unlock:
+	spin_unlock_irqrestore(lport->host->host_lock, flags);
 }
 
 /**
@@ -373,18 +374,19 @@ static void fc_fcp_can_queue_ramp_up(struct fc_lport *lport)
  * commands complete or timeout, then try again with a reduced
  * can_queue. Eventually we will hit the point where we run
  * on all reserved structs.
- *
- * Locking notes: Called with Scsi_Host lock held
  */
 static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
 {
 	struct fc_fcp_internal *si = fc_get_scsi_internal(lport);
+	unsigned long flags;
 	int can_queue;
 
+	spin_lock_irqsave(lport->host->host_lock, flags);
+
 	if (si->last_can_queue_ramp_down_time &&
 	    (time_before(jiffies, si->last_can_queue_ramp_down_time +
 			 FC_CAN_QUEUE_PERIOD)))
-		return;
+		goto unlock;
 
 	si->last_can_queue_ramp_down_time = jiffies;
 
@@ -395,6 +397,9 @@ static void fc_fcp_can_queue_ramp_down(struct fc_lport *lport)
 	lport->host->can_queue = can_queue;
 	shost_printk(KERN_ERR, lport->host, "libfc: Could not allocate frame.\n"
 		     "Reducing can_queue to %d.\n", can_queue);
+
+unlock:
+	spin_unlock_irqrestore(lport->host->host_lock, flags);
 }
 
 /*
@@ -409,16 +414,13 @@ static inline struct fc_frame *fc_fcp_frame_alloc(struct fc_lport *lport,
 						  size_t len)
 {
 	struct fc_frame *fp;
-	unsigned long flags;
 
 	fp = fc_frame_alloc(lport, len);
 	if (likely(fp))
 		return fp;
 
 	/* error case */
-	spin_lock_irqsave(lport->host->host_lock, flags);
 	fc_fcp_can_queue_ramp_down(lport);
-	spin_unlock_irqrestore(lport->host->host_lock, flags);
 	return NULL;
 }
 
@@ -1093,16 +1095,14 @@ static int fc_fcp_pkt_send(struct fc_lport *lport, struct fc_fcp_pkt *fsp)
 /**
  * get_fsp_rec_tov() - Helper function to get REC_TOV
  * @fsp: the FCP packet
+ *
+ * Returns rec tov in jiffies as rpriv->e_d_tov + 1 second
  */
 static inline unsigned int get_fsp_rec_tov(struct fc_fcp_pkt *fsp)
 {
-	struct fc_rport *rport;
-	struct fc_rport_libfc_priv *rpriv;
-
-	rport = fsp->rport;
-	rpriv = rport->dd_data;
+	struct fc_rport_libfc_priv *rpriv = fsp->rport->dd_data;
 
-	return rpriv->e_d_tov + REC_TOV_CONST;
+	return msecs_to_jiffies(rpriv->e_d_tov) + HZ;
 }
 
 /**
@@ -1122,7 +1122,6 @@ static int fc_fcp_cmd_send(struct fc_lport *lport, struct fc_fcp_pkt *fsp,
 	struct fc_rport_libfc_priv *rpriv;
 	const size_t len = sizeof(fsp->cdb_cmd);
 	int rc = 0;
-	unsigned int rec_tov;
 
 	if (fc_fcp_lock_pkt(fsp))
 		return 0;
@@ -1153,12 +1152,9 @@ static int fc_fcp_cmd_send(struct fc_lport *lport, struct fc_fcp_pkt *fsp,
 	fsp->seq_ptr = seq;
 	fc_fcp_pkt_hold(fsp);	/* hold for fc_fcp_pkt_destroy */
 
-	rec_tov = get_fsp_rec_tov(fsp);
-
 	setup_timer(&fsp->timer, fc_fcp_timeout, (unsigned long)fsp);
-
 	if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED)
-		fc_fcp_timer_set(fsp, rec_tov);
+		fc_fcp_timer_set(fsp, get_fsp_rec_tov(fsp));
 
 unlock:
 	fc_fcp_unlock_pkt(fsp);
@@ -1235,16 +1231,14 @@ static void fc_lun_reset_send(unsigned long data)
 {
 	struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)data;
 	struct fc_lport *lport = fsp->lp;
-	unsigned int rec_tov;
 
 	if (lport->tt.fcp_cmd_send(lport, fsp, fc_tm_done)) {
 		if (fsp->recov_retry++ >= FC_MAX_RECOV_RETRY)
 			return;
 		if (fc_fcp_lock_pkt(fsp))
 			return;
-		rec_tov = get_fsp_rec_tov(fsp);
 		setup_timer(&fsp->timer, fc_lun_reset_send, (unsigned long)fsp);
-		fc_fcp_timer_set(fsp, rec_tov);
+		fc_fcp_timer_set(fsp, get_fsp_rec_tov(fsp));
 		fc_fcp_unlock_pkt(fsp);
 	}
 }
@@ -1536,12 +1530,11 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
 			}
 			fc_fcp_srr(fsp, r_ctl, offset);
 		} else if (e_stat & ESB_ST_SEQ_INIT) {
-			unsigned int rec_tov = get_fsp_rec_tov(fsp);
 			/*
 			 * The remote port has the initiative, so just
 			 * keep waiting for it to complete.
 			 */
-			fc_fcp_timer_set(fsp, rec_tov);
+			fc_fcp_timer_set(fsp,  get_fsp_rec_tov(fsp));
 		} else {
 
 			/*
@@ -1705,7 +1698,6 @@ static void fc_fcp_srr_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
 {
 	struct fc_fcp_pkt *fsp = arg;
 	struct fc_frame_header *fh;
-	unsigned int rec_tov;
 
 	if (IS_ERR(fp)) {
 		fc_fcp_srr_error(fsp, fp);
@@ -1732,8 +1724,7 @@ static void fc_fcp_srr_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
 	switch (fc_frame_payload_op(fp)) {
 	case ELS_LS_ACC:
 		fsp->recov_retry = 0;
-		rec_tov = get_fsp_rec_tov(fsp);
-		fc_fcp_timer_set(fsp, rec_tov);
+		fc_fcp_timer_set(fsp, get_fsp_rec_tov(fsp));
 		break;
 	case ELS_LS_RJT:
 	default:
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 906bbcad0e2d..389ab80aef0a 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1590,7 +1590,6 @@ void fc_lport_enter_flogi(struct fc_lport *lport)
  */
 int fc_lport_config(struct fc_lport *lport)
 {
-	INIT_LIST_HEAD(&lport->ema_list);
 	INIT_DELAYED_WORK(&lport->retry_work, fc_lport_timeout);
 	mutex_init(&lport->lp_mutex);
 
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 60e98a62f308..02d53d89534f 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -805,6 +805,8 @@ struct lpfc_hba {
 	struct dentry *idiag_root;
 	struct dentry *idiag_pci_cfg;
 	struct dentry *idiag_que_info;
+	struct dentry *idiag_que_acc;
+	struct dentry *idiag_drb_acc;
 #endif
 
 	/* Used for deferred freeing of ELS data buffers */
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 77b2871d96b7..37e2a1272f86 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2426,6 +2426,7 @@ lpfc_bsg_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 {
 	struct bsg_job_data *dd_data;
 	struct fc_bsg_job *job;
+	struct lpfc_mbx_nembed_cmd *nembed_sge;
 	uint32_t size;
 	unsigned long flags;
 	uint8_t *to;
@@ -2469,9 +2470,8 @@ lpfc_bsg_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 			memcpy(to, from, size);
 		} else if ((phba->sli_rev == LPFC_SLI_REV4) &&
 			(pmboxq->u.mb.mbxCommand == MBX_SLI4_CONFIG)) {
-			struct lpfc_mbx_nembed_cmd *nembed_sge =
-				(struct lpfc_mbx_nembed_cmd *)
-				&pmboxq->u.mb.un.varWords[0];
+			nembed_sge = (struct lpfc_mbx_nembed_cmd *)
+					&pmboxq->u.mb.un.varWords[0];
 
 			from = (uint8_t *)dd_data->context_un.mbox.dmp->dma.
 						virt;
@@ -2496,16 +2496,18 @@ lpfc_bsg_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 					job->reply_payload.sg_cnt,
 					from, size);
 		job->reply->result = 0;
-
+		/* need to hold the lock until we set job->dd_data to NULL
+		 * to hold off the timeout handler returning to the mid-layer
+		 * while we are still processing the job.
+		 */
 		job->dd_data = NULL;
+		dd_data->context_un.mbox.set_job = NULL;
+		spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 		job->job_done(job);
+	} else {
+		dd_data->context_un.mbox.set_job = NULL;
+		spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 	}
-	dd_data->context_un.mbox.set_job = NULL;
-	/* need to hold the lock until we call job done to hold off
-	 * the timeout handler returning to the midlayer while
-	 * we are stillprocessing the job
-	 */
-	spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 
 	kfree(dd_data->context_un.mbox.mb);
 	mempool_free(dd_data->context_un.mbox.pmboxq, phba->mbox_mem_pool);
@@ -2644,6 +2646,11 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 	struct ulp_bde64 *rxbpl = NULL;
 	struct dfc_mbox_req *mbox_req = (struct dfc_mbox_req *)
 		job->request->rqst_data.h_vendor.vendor_cmd;
+	struct READ_EVENT_LOG_VAR *rdEventLog;
+	uint32_t transmit_length, receive_length, mode;
+	struct lpfc_mbx_nembed_cmd *nembed_sge;
+	struct mbox_header *header;
+	struct ulp_bde64 *bde;
 	uint8_t *ext = NULL;
 	int rc = 0;
 	uint8_t *from;
@@ -2651,9 +2658,16 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 	/* in case no data is transferred */
 	job->reply->reply_payload_rcv_len = 0;
 
+	/* sanity check to protect driver */
+	if (job->reply_payload.payload_len > BSG_MBOX_SIZE ||
+	    job->request_payload.payload_len > BSG_MBOX_SIZE) {
+		rc = -ERANGE;
+		goto job_done;
+	}
+
 	/* check if requested extended data lengths are valid */
-	if ((mbox_req->inExtWLen > MAILBOX_EXT_SIZE) ||
-		(mbox_req->outExtWLen > MAILBOX_EXT_SIZE)) {
+	if ((mbox_req->inExtWLen > BSG_MBOX_SIZE/sizeof(uint32_t)) ||
+	    (mbox_req->outExtWLen > BSG_MBOX_SIZE/sizeof(uint32_t))) {
 		rc = -ERANGE;
 		goto job_done;
 	}
@@ -2744,8 +2758,8 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 	 * use ours
 	 */
 	if (pmb->mbxCommand == MBX_RUN_BIU_DIAG64) {
-		uint32_t transmit_length = pmb->un.varWords[1];
-		uint32_t receive_length = pmb->un.varWords[4];
+		transmit_length = pmb->un.varWords[1];
+		receive_length = pmb->un.varWords[4];
 		/* transmit length cannot be greater than receive length or
 		 * mailbox extension size
 		 */
@@ -2795,10 +2809,9 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 		from += sizeof(MAILBOX_t);
 		memcpy((uint8_t *)dmp->dma.virt, from, transmit_length);
 	} else if (pmb->mbxCommand == MBX_READ_EVENT_LOG) {
-		struct READ_EVENT_LOG_VAR *rdEventLog =
-			&pmb->un.varRdEventLog ;
-		uint32_t receive_length = rdEventLog->rcv_bde64.tus.f.bdeSize;
-		uint32_t mode =	 bf_get(lpfc_event_log, rdEventLog);
+		rdEventLog = &pmb->un.varRdEventLog;
+		receive_length = rdEventLog->rcv_bde64.tus.f.bdeSize;
+		mode = bf_get(lpfc_event_log, rdEventLog);
 
 		/* receive length cannot be greater than mailbox
 		 * extension size
@@ -2843,7 +2856,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 			/* rebuild the command for sli4 using our own buffers
 			* like we do for biu diags
 			*/
-			uint32_t receive_length = pmb->un.varWords[2];
+			receive_length = pmb->un.varWords[2];
 			/* receive length cannot be greater than mailbox
 			 * extension size
 			 */
@@ -2879,8 +2892,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 			pmb->un.varWords[4] = putPaddrHigh(dmp->dma.phys);
 		} else if ((pmb->mbxCommand == MBX_UPDATE_CFG) &&
 			pmb->un.varUpdateCfg.co) {
-			struct ulp_bde64 *bde =
-				(struct ulp_bde64 *)&pmb->un.varWords[4];
+			bde = (struct ulp_bde64 *)&pmb->un.varWords[4];
 
 			/* bde size cannot be greater than mailbox ext size */
 			if (bde->tus.f.bdeSize > MAILBOX_EXT_SIZE) {
@@ -2921,10 +2933,6 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct fc_bsg_job *job,
 			memcpy((uint8_t *)dmp->dma.virt, from,
 				bde->tus.f.bdeSize);
 		} else if (pmb->mbxCommand == MBX_SLI4_CONFIG) {
-			struct lpfc_mbx_nembed_cmd *nembed_sge;
-			struct mbox_header *header;
-			uint32_t receive_length;
-
 			/* rebuild the command for sli4 using our own buffers
 			* like we do for biu diags
 			*/
@@ -3386,6 +3394,7 @@ no_dd_data:
 	job->dd_data = NULL;
 	return rc;
 }
+
 /**
  * lpfc_bsg_hst_vendor - process a vendor-specific fc_bsg_job
  * @job: fc_bsg_job to handle
diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index a2c33e7c9152..b542aca6f5ae 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h
@@ -109,3 +109,133 @@ struct menlo_response {
 	uint32_t xri; /* return the xri of the iocb exchange */
 };
 
+/*
+ * macros and data structures for handling sli-config mailbox command
+ * pass-through support, this header file is shared between user and
+ * kernel spaces, note the set of macros are duplicates from lpfc_hw4.h,
+ * with macro names prefixed with bsg_, as the macros defined in
+ * lpfc_hw4.h are not accessible from user space.
+ */
+
+/* Macros to deal with bit fields. Each bit field must have 3 #defines
+ * associated with it (_SHIFT, _MASK, and _WORD).
+ * EG. For a bit field that is in the 7th bit of the "field4" field of a
+ * structure and is 2 bits in size the following #defines must exist:
+ *      struct temp {
+ *              uint32_t        field1;
+ *              uint32_t        field2;
+ *              uint32_t        field3;
+ *              uint32_t        field4;
+ *      #define example_bit_field_SHIFT         7
+ *      #define example_bit_field_MASK          0x03
+ *      #define example_bit_field_WORD          field4
+ *              uint32_t        field5;
+ *      };
+ * Then the macros below may be used to get or set the value of that field.
+ * EG. To get the value of the bit field from the above example:
+ *      struct temp t1;
+ *      value = bsg_bf_get(example_bit_field, &t1);
+ * And then to set that bit field:
+ *      bsg_bf_set(example_bit_field, &t1, 2);
+ * Or clear that bit field:
+ *      bsg_bf_set(example_bit_field, &t1, 0);
+ */
+#define bsg_bf_get_le32(name, ptr) \
+	((le32_to_cpu((ptr)->name##_WORD) >> name##_SHIFT) & name##_MASK)
+#define bsg_bf_get(name, ptr) \
+	(((ptr)->name##_WORD >> name##_SHIFT) & name##_MASK)
+#define bsg_bf_set_le32(name, ptr, value) \
+	((ptr)->name##_WORD = cpu_to_le32(((((value) & \
+	name##_MASK) << name##_SHIFT) | (le32_to_cpu((ptr)->name##_WORD) & \
+	~(name##_MASK << name##_SHIFT)))))
+#define bsg_bf_set(name, ptr, value) \
+	((ptr)->name##_WORD = ((((value) & name##_MASK) << name##_SHIFT) | \
+	((ptr)->name##_WORD & ~(name##_MASK << name##_SHIFT))))
+
+/*
+ * The sli_config structure specified here is based on the following
+ * restriction:
+ *
+ * -- SLI_CONFIG EMB=0, carrying MSEs, will carry subcommands without
+ *    carrying HBD.
+ * -- SLI_CONFIG EMB=1, not carrying MSE, will carry subcommands with or
+ *    without carrying HBDs.
+ */
+
+struct lpfc_sli_config_mse {
+	uint32_t pa_lo;
+	uint32_t pa_hi;
+	uint32_t buf_len;
+#define lpfc_mbox_sli_config_mse_len_SHIFT	0
+#define lpfc_mbox_sli_config_mse_len_MASK	0xffffff
+#define lpfc_mbox_sli_config_mse_len_WORD	buf_len
+};
+
+struct lpfc_sli_config_subcmd_hbd {
+	uint32_t buf_len;
+#define lpfc_mbox_sli_config_ecmn_hbd_len_SHIFT	0
+#define lpfc_mbox_sli_config_ecmn_hbd_len_MASK	0xffffff
+#define lpfc_mbox_sli_config_ecmn_hbd_len_WORD	buf_len
+	uint32_t pa_lo;
+	uint32_t pa_hi;
+};
+
+struct lpfc_sli_config_hdr {
+	uint32_t word1;
+#define lpfc_mbox_hdr_emb_SHIFT		0
+#define lpfc_mbox_hdr_emb_MASK		0x00000001
+#define lpfc_mbox_hdr_emb_WORD		word1
+#define lpfc_mbox_hdr_mse_cnt_SHIFT	3
+#define lpfc_mbox_hdr_mse_cnt_MASK	0x0000001f
+#define lpfc_mbox_hdr_mse_cnt_WORD	word1
+	uint32_t payload_length;
+	uint32_t tag_lo;
+	uint32_t tag_hi;
+	uint32_t reserved5;
+};
+
+struct lpfc_sli_config_generic {
+	struct lpfc_sli_config_hdr	sli_config_hdr;
+#define LPFC_MBX_SLI_CONFIG_MAX_MSE     19
+	struct lpfc_sli_config_mse	mse[LPFC_MBX_SLI_CONFIG_MAX_MSE];
+};
+
+struct lpfc_sli_config_subcmnd {
+	struct lpfc_sli_config_hdr	sli_config_hdr;
+	uint32_t word6;
+#define lpfc_subcmnd_opcode_SHIFT	0
+#define lpfc_subcmnd_opcode_MASK	0xff
+#define lpfc_subcmnd_opcode_WORD	word6
+#define lpfc_subcmnd_subsys_SHIFT	8
+#define lpfc_subcmnd_subsys_MASK	0xff
+#define lpfc_subcmnd_subsys_WORD	word6
+	uint32_t timeout;
+	uint32_t request_length;
+	uint32_t word9;
+#define lpfc_subcmnd_version_SHIFT	0
+#define lpfc_subcmnd_version_MASK	0xff
+#define lpfc_subcmnd_version_WORD	word9
+	uint32_t word10;
+#define lpfc_subcmnd_ask_rd_len_SHIFT	0
+#define lpfc_subcmnd_ask_rd_len_MASK	0xffffff
+#define lpfc_subcmnd_ask_rd_len_WORD	word10
+	uint32_t rd_offset;
+	uint32_t obj_name[26];
+	uint32_t hbd_count;
+#define LPFC_MBX_SLI_CONFIG_MAX_HBD	10
+	struct lpfc_sli_config_subcmd_hbd   hbd[LPFC_MBX_SLI_CONFIG_MAX_HBD];
+};
+
+struct lpfc_sli_config_mbox {
+	uint32_t word0;
+#define lpfc_mqe_status_SHIFT		16
+#define lpfc_mqe_status_MASK		0x0000FFFF
+#define lpfc_mqe_status_WORD		word0
+#define lpfc_mqe_command_SHIFT		8
+#define lpfc_mqe_command_MASK		0x000000FF
+#define lpfc_mqe_command_WORD		word0
+	union {
+		struct lpfc_sli_config_generic	sli_config_generic;
+		struct lpfc_sli_config_subcmnd	sli_config_subcmnd;
+	} un;
+};
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 3d967741c708..c93fca058603 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1119,172 +1119,14 @@ lpfc_debugfs_dumpDataDif_release(struct inode *inode, struct file *file)
 }
 
 /*
+ * ---------------------------------
  * iDiag debugfs file access methods
- */
-
-/*
- * iDiag PCI config space register access methods:
- *
- * The PCI config space register accessees of read, write, read-modify-write
- * for set bits, and read-modify-write for clear bits to SLI4 PCI functions
- * are provided. In the proper SLI4 PCI function's debugfs iDiag directory,
- *
- *      /sys/kernel/debug/lpfc/fn<#>/iDiag
- *
- * the access is through the debugfs entry pciCfg:
- *
- * 1. For PCI config space register read access, there are two read methods:
- *    A) read a single PCI config space register in the size of a byte
- *    (8 bits), a word (16 bits), or a dword (32 bits); or B) browse through
- *    the 4K extended PCI config space.
- *
- *    A) Read a single PCI config space register consists of two steps:
- *
- *    Step-1: Set up PCI config space register read command, the command
- *    syntax is,
- *
- *        echo 1 <where> <count> > pciCfg
- *
- *    where, 1 is the iDiag command for PCI config space read, <where> is the
- *    offset from the beginning of the device's PCI config space to read from,
- *    and <count> is the size of PCI config space register data to read back,
- *    it will be 1 for reading a byte (8 bits), 2 for reading a word (16 bits
- *    or 2 bytes), or 4 for reading a dword (32 bits or 4 bytes).
- *
- *    Setp-2: Perform the debugfs read operation to execute the idiag command
- *    set up in Step-1,
- *
- *        cat pciCfg
- *
- *    Examples:
- *    To read PCI device's vendor-id and device-id from PCI config space,
- *
- *        echo 1 0 4 > pciCfg
- *        cat pciCfg
- *
- *    To read PCI device's currnt command from config space,
- *
- *        echo 1 4 2 > pciCfg
- *        cat pciCfg
- *
- *    B) Browse through the entire 4K extended PCI config space also consists
- *    of two steps:
- *
- *    Step-1: Set up PCI config space register browsing command, the command
- *    syntax is,
- *
- *        echo 1 0 4096 > pciCfg
- *
- *    where, 1 is the iDiag command for PCI config space read, 0 must be used
- *    as the offset for PCI config space register browse, and 4096 must be
- *    used as the count for PCI config space register browse.
- *
- *    Step-2: Repeately issue the debugfs read operation to browse through
- *    the entire PCI config space registers:
- *
- *        cat pciCfg
- *        cat pciCfg
- *        cat pciCfg
- *        ...
- *
- *    When browsing to the end of the 4K PCI config space, the browse method
- *    shall wrap around to start reading from beginning again, and again...
- *
- * 2. For PCI config space register write access, it supports a single PCI
- *    config space register write in the size of a byte (8 bits), a word
- *    (16 bits), or a dword (32 bits). The command syntax is,
- *
- *        echo 2 <where> <count> <value> > pciCfg
- *
- *    where, 2 is the iDiag command for PCI config space write, <where> is
- *    the offset from the beginning of the device's PCI config space to write
- *    into, <count> is the size of data to write into the PCI config space,
- *    it will be 1 for writing a byte (8 bits), 2 for writing a word (16 bits
- *    or 2 bytes), or 4 for writing a dword (32 bits or 4 bytes), and <value>
- *    is the data to be written into the PCI config space register at the
- *    offset.
- *
- *    Examples:
- *    To disable PCI device's interrupt assertion,
- *
- *    1) Read in device's PCI config space register command field <cmd>:
- *
- *           echo 1 4 2 > pciCfg
- *           cat pciCfg
- *
- *    2) Set bit 10 (Interrupt Disable bit) in the <cmd>:
- *
- *           <cmd> = <cmd> | (1 < 10)
- *
- *    3) Write the modified command back:
- *
- *           echo 2 4 2 <cmd> > pciCfg
- *
- * 3. For PCI config space register set bits access, it supports a single PCI
- *    config space register set bits in the size of a byte (8 bits), a word
- *    (16 bits), or a dword (32 bits). The command syntax is,
- *
- *        echo 3 <where> <count> <bitmask> > pciCfg
- *
- *    where, 3 is the iDiag command for PCI config space set bits, <where> is
- *    the offset from the beginning of the device's PCI config space to set
- *    bits into, <count> is the size of the bitmask to set into the PCI config
- *    space, it will be 1 for setting a byte (8 bits), 2 for setting a word
- *    (16 bits or 2 bytes), or 4 for setting a dword (32 bits or 4 bytes), and
- *    <bitmask> is the bitmask, indicating the bits to be set into the PCI
- *    config space register at the offset. The logic performed to the content
- *    of the PCI config space register, regval, is,
- *
- *        regval |= <bitmask>
- *
- * 4. For PCI config space register clear bits access, it supports a single
- *    PCI config space register clear bits in the size of a byte (8 bits),
- *    a word (16 bits), or a dword (32 bits). The command syntax is,
- *
- *        echo 4 <where> <count> <bitmask> > pciCfg
- *
- *    where, 4 is the iDiag command for PCI config space clear bits, <where>
- *    is the offset from the beginning of the device's PCI config space to
- *    clear bits from, <count> is the size of the bitmask to set into the PCI
- *    config space, it will be 1 for setting a byte (8 bits), 2 for setting
- *    a word(16 bits or 2 bytes), or 4 for setting a dword (32 bits or 4
- *    bytes), and <bitmask> is the bitmask, indicating the bits to be cleared
- *    from the PCI config space register at the offset. the logic performed
- *    to the content of the PCI config space register, regval, is,
- *
- *        regval &= ~<bitmask>
- *
- * Note, for all single register read, write, set bits, or clear bits access,
- * the offset (<where>) must be aligned with the size of the data:
- *
- * For data size of byte (8 bits), the offset must be aligned to the byte
- * boundary; for data size of word (16 bits), the offset must be aligned
- * to the word boundary; while for data size of dword (32 bits), the offset
- * must be aligned to the dword boundary. Otherwise, the interface will
- * return the error:
+ * ---------------------------------
  *
- *     "-bash: echo: write error: Invalid argument".
+ * All access methods are through the proper SLI4 PCI function's debugfs
+ * iDiag directory:
  *
- * For example:
- *
- *     echo 1 2 4 > pciCfg
- *     -bash: echo: write error: Invalid argument
- *
- * Note also, all of the numbers in the command fields for all read, write,
- * set bits, and clear bits PCI config space register command fields can be
- * either decimal or hex.
- *
- * For example,
- *     echo 1 0 4096 > pciCfg
- *
- * will be the same as
- *     echo 1 0 0x1000 > pciCfg
- *
- * And,
- *     echo 2 155 1 10 > pciCfg
- *
- * will be
- *     echo 2 0x9b 1 0xa > pciCfg
+ *     /sys/kernel/debug/lpfc/fn<#>/iDiag
  */
 
 /**
@@ -1331,10 +1173,10 @@ static int lpfc_idiag_cmd_get(const char __user *buf, size_t nbytes,
 	for (i = 0; i < LPFC_IDIAG_CMD_DATA_SIZE; i++) {
 		step_str = strsep(&pbuf, "\t ");
 		if (!step_str)
-			return 0;
+			return i;
 		idiag_cmd->data[i] = simple_strtol(step_str, NULL, 0);
 	}
-	return 0;
+	return i;
 }
 
 /**
@@ -1403,7 +1245,7 @@ lpfc_idiag_release(struct inode *inode, struct file *file)
  * Description:
  * This routine frees the buffer that was allocated when the debugfs file
  * was opened. It also reset the fields in the idiag command struct in the
- * case the command is not continuous browsing of the data structure.
+ * case of command for write operation.
  *
  * Returns:
  * This function returns zero.
@@ -1413,18 +1255,20 @@ lpfc_idiag_cmd_release(struct inode *inode, struct file *file)
 {
 	struct lpfc_debug *debug = file->private_data;
 
-	/* Read PCI config register, if not read all, clear command fields */
-	if ((debug->op == LPFC_IDIAG_OP_RD) &&
-	    (idiag.cmd.opcode == LPFC_IDIAG_CMD_PCICFG_RD))
-		if ((idiag.cmd.data[1] == sizeof(uint8_t)) ||
-		    (idiag.cmd.data[1] == sizeof(uint16_t)) ||
-		    (idiag.cmd.data[1] == sizeof(uint32_t)))
+	if (debug->op == LPFC_IDIAG_OP_WR) {
+		switch (idiag.cmd.opcode) {
+		case LPFC_IDIAG_CMD_PCICFG_WR:
+		case LPFC_IDIAG_CMD_PCICFG_ST:
+		case LPFC_IDIAG_CMD_PCICFG_CL:
+		case LPFC_IDIAG_CMD_QUEACC_WR:
+		case LPFC_IDIAG_CMD_QUEACC_ST:
+		case LPFC_IDIAG_CMD_QUEACC_CL:
 			memset(&idiag, 0, sizeof(idiag));
-
-	/* Write PCI config register, clear command fields */
-	if ((debug->op == LPFC_IDIAG_OP_WR) &&
-	    (idiag.cmd.opcode == LPFC_IDIAG_CMD_PCICFG_WR))
-		memset(&idiag, 0, sizeof(idiag));
+			break;
+		default:
+			break;
+		}
+	}
 
 	/* Free the buffers to the file operation */
 	kfree(debug->buffer);
@@ -1504,7 +1348,7 @@ lpfc_idiag_pcicfg_read(struct file *file, char __user *buf, size_t nbytes,
 		len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len,
 				"%03x: %08x\n", where, u32val);
 		break;
-	case LPFC_PCI_CFG_SIZE: /* browse all */
+	case LPFC_PCI_CFG_BROWSE: /* browse all */
 		goto pcicfg_browse;
 		break;
 	default:
@@ -1586,16 +1430,21 @@ lpfc_idiag_pcicfg_write(struct file *file, const char __user *buf,
 	debug->op = LPFC_IDIAG_OP_WR;
 
 	rc = lpfc_idiag_cmd_get(buf, nbytes, &idiag.cmd);
-	if (rc)
+	if (rc < 0)
 		return rc;
 
 	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_PCICFG_RD) {
+		/* Sanity check on PCI config read command line arguments */
+		if (rc != LPFC_PCI_CFG_RD_CMD_ARG)
+			goto error_out;
 		/* Read command from PCI config space, set up command fields */
 		where = idiag.cmd.data[0];
 		count = idiag.cmd.data[1];
-		if (count == LPFC_PCI_CFG_SIZE) {
-			if (where != 0)
+		if (count == LPFC_PCI_CFG_BROWSE) {
+			if (where % sizeof(uint32_t))
 				goto error_out;
+			/* Starting offset to browse */
+			idiag.offset.last_rd = where;
 		} else if ((count != sizeof(uint8_t)) &&
 			   (count != sizeof(uint16_t)) &&
 			   (count != sizeof(uint32_t)))
@@ -1621,6 +1470,9 @@ lpfc_idiag_pcicfg_write(struct file *file, const char __user *buf,
 	} else if (idiag.cmd.opcode == LPFC_IDIAG_CMD_PCICFG_WR ||
 		   idiag.cmd.opcode == LPFC_IDIAG_CMD_PCICFG_ST ||
 		   idiag.cmd.opcode == LPFC_IDIAG_CMD_PCICFG_CL) {
+		/* Sanity check on PCI config write command line arguments */
+		if (rc != LPFC_PCI_CFG_WR_CMD_ARG)
+			goto error_out;
 		/* Write command to PCI config space, read-modify-write */
 		where = idiag.cmd.data[0];
 		count = idiag.cmd.data[1];
@@ -1753,10 +1605,12 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
 			"Slow-path EQ information:\n");
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\tID [%02d], EQE-COUNT [%04d], "
-			"HOST-INDEX [%04x], PORT-INDEX [%04x]\n\n",
+			"\tEQID[%02d], "
+			"QE-COUNT[%04d], QE-SIZE[%04d], "
+			"HOST-INDEX[%04d], PORT-INDEX[%04d]\n\n",
 			phba->sli4_hba.sp_eq->queue_id,
 			phba->sli4_hba.sp_eq->entry_count,
+			phba->sli4_hba.sp_eq->entry_size,
 			phba->sli4_hba.sp_eq->host_index,
 			phba->sli4_hba.sp_eq->hba_index);
 
@@ -1765,10 +1619,12 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 			"Fast-path EQ information:\n");
 	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++) {
 		len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tID [%02d], EQE-COUNT [%04d], "
-				"HOST-INDEX [%04x], PORT-INDEX [%04x]\n",
+				"\tEQID[%02d], "
+				"QE-COUNT[%04d], QE-SIZE[%04d], "
+				"HOST-INDEX[%04d], PORT-INDEX[%04d]\n",
 				phba->sli4_hba.fp_eq[fcp_qidx]->queue_id,
 				phba->sli4_hba.fp_eq[fcp_qidx]->entry_count,
+				phba->sli4_hba.fp_eq[fcp_qidx]->entry_size,
 				phba->sli4_hba.fp_eq[fcp_qidx]->host_index,
 				phba->sli4_hba.fp_eq[fcp_qidx]->hba_index);
 	}
@@ -1776,89 +1632,101 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 
 	/* Get mailbox complete queue information */
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"Mailbox CQ information:\n");
+			"Slow-path MBX CQ information:\n");
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\t\tAssociated EQ-ID [%02d]:\n",
+			"Associated EQID[%02d]:\n",
 			phba->sli4_hba.mbx_cq->assoc_qid);
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\tID [%02d], CQE-COUNT [%04d], "
-			"HOST-INDEX [%04x], PORT-INDEX [%04x]\n\n",
+			"\tCQID[%02d], "
+			"QE-COUNT[%04d], QE-SIZE[%04d], "
+			"HOST-INDEX[%04d], PORT-INDEX[%04d]\n\n",
 			phba->sli4_hba.mbx_cq->queue_id,
 			phba->sli4_hba.mbx_cq->entry_count,
+			phba->sli4_hba.mbx_cq->entry_size,
 			phba->sli4_hba.mbx_cq->host_index,
 			phba->sli4_hba.mbx_cq->hba_index);
 
 	/* Get slow-path complete queue information */
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"Slow-path CQ information:\n");
+			"Slow-path ELS CQ information:\n");
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\t\tAssociated EQ-ID [%02d]:\n",
+			"Associated EQID[%02d]:\n",
 			phba->sli4_hba.els_cq->assoc_qid);
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\tID [%02d], CQE-COUNT [%04d], "
-			"HOST-INDEX [%04x], PORT-INDEX [%04x]\n\n",
+			"\tCQID [%02d], "
+			"QE-COUNT[%04d], QE-SIZE[%04d], "
+			"HOST-INDEX[%04d], PORT-INDEX[%04d]\n\n",
 			phba->sli4_hba.els_cq->queue_id,
 			phba->sli4_hba.els_cq->entry_count,
+			phba->sli4_hba.els_cq->entry_size,
 			phba->sli4_hba.els_cq->host_index,
 			phba->sli4_hba.els_cq->hba_index);
 
 	/* Get fast-path complete queue information */
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"Fast-path CQ information:\n");
+			"Fast-path FCP CQ information:\n");
 	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++) {
 		len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tAssociated EQ-ID [%02d]:\n",
+				"Associated EQID[%02d]:\n",
 				phba->sli4_hba.fcp_cq[fcp_qidx]->assoc_qid);
 		len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-		"\tID [%02d], EQE-COUNT [%04d], "
-		"HOST-INDEX [%04x], PORT-INDEX [%04x]\n",
-		phba->sli4_hba.fcp_cq[fcp_qidx]->queue_id,
-		phba->sli4_hba.fcp_cq[fcp_qidx]->entry_count,
-		phba->sli4_hba.fcp_cq[fcp_qidx]->host_index,
-		phba->sli4_hba.fcp_cq[fcp_qidx]->hba_index);
+				"\tCQID[%02d], "
+				"QE-COUNT[%04d], QE-SIZE[%04d], "
+				"HOST-INDEX[%04d], PORT-INDEX[%04d]\n",
+				phba->sli4_hba.fcp_cq[fcp_qidx]->queue_id,
+				phba->sli4_hba.fcp_cq[fcp_qidx]->entry_count,
+				phba->sli4_hba.fcp_cq[fcp_qidx]->entry_size,
+				phba->sli4_hba.fcp_cq[fcp_qidx]->host_index,
+				phba->sli4_hba.fcp_cq[fcp_qidx]->hba_index);
 	}
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
 
 	/* Get mailbox queue information */
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"Mailbox MQ information:\n");
+			"Slow-path MBX MQ information:\n");
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\t\tAssociated CQ-ID [%02d]:\n",
+			"Associated CQID[%02d]:\n",
 			phba->sli4_hba.mbx_wq->assoc_qid);
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\tID [%02d], MQE-COUNT [%04d], "
-			"HOST-INDEX [%04x], PORT-INDEX [%04x]\n\n",
+			"\tWQID[%02d], "
+			"QE-COUNT[%04d], QE-SIZE[%04d], "
+			"HOST-INDEX[%04d], PORT-INDEX[%04d]\n\n",
 			phba->sli4_hba.mbx_wq->queue_id,
 			phba->sli4_hba.mbx_wq->entry_count,
+			phba->sli4_hba.mbx_wq->entry_size,
 			phba->sli4_hba.mbx_wq->host_index,
 			phba->sli4_hba.mbx_wq->hba_index);
 
 	/* Get slow-path work queue information */
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"Slow-path WQ information:\n");
+			"Slow-path ELS WQ information:\n");
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\t\tAssociated CQ-ID [%02d]:\n",
+			"Associated CQID[%02d]:\n",
 			phba->sli4_hba.els_wq->assoc_qid);
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\tID [%02d], WQE-COUNT [%04d], "
-			"HOST-INDEX [%04x], PORT-INDEX [%04x]\n\n",
+			"\tWQID[%02d], "
+			"QE-COUNT[%04d], QE-SIZE[%04d], "
+			"HOST-INDEX[%04d], PORT-INDEX[%04d]\n\n",
 			phba->sli4_hba.els_wq->queue_id,
 			phba->sli4_hba.els_wq->entry_count,
+			phba->sli4_hba.els_wq->entry_size,
 			phba->sli4_hba.els_wq->host_index,
 			phba->sli4_hba.els_wq->hba_index);
 
 	/* Get fast-path work queue information */
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"Fast-path WQ information:\n");
+			"Fast-path FCP WQ information:\n");
 	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_wq_count; fcp_qidx++) {
 		len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tAssociated CQ-ID [%02d]:\n",
+				"Associated CQID[%02d]:\n",
 				phba->sli4_hba.fcp_wq[fcp_qidx]->assoc_qid);
 		len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tID [%02d], WQE-COUNT [%04d], "
-				"HOST-INDEX [%04x], PORT-INDEX [%04x]\n",
+				"\tWQID[%02d], "
+				"QE-COUNT[%04d], WQE-SIZE[%04d], "
+				"HOST-INDEX[%04d], PORT-INDEX[%04d]\n",
 				phba->sli4_hba.fcp_wq[fcp_qidx]->queue_id,
 				phba->sli4_hba.fcp_wq[fcp_qidx]->entry_count,
+				phba->sli4_hba.fcp_wq[fcp_qidx]->entry_size,
 				phba->sli4_hba.fcp_wq[fcp_qidx]->host_index,
 				phba->sli4_hba.fcp_wq[fcp_qidx]->hba_index);
 	}
@@ -1868,26 +1736,597 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
 			"Slow-path RQ information:\n");
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\t\tAssociated CQ-ID [%02d]:\n",
+			"Associated CQID[%02d]:\n",
 			phba->sli4_hba.hdr_rq->assoc_qid);
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\tID [%02d], RHQE-COUNT [%04d], "
-			"HOST-INDEX [%04x], PORT-INDEX [%04x]\n",
+			"\tHQID[%02d], "
+			"QE-COUNT[%04d], QE-SIZE[%04d], "
+			"HOST-INDEX[%04d], PORT-INDEX[%04d]\n",
 			phba->sli4_hba.hdr_rq->queue_id,
 			phba->sli4_hba.hdr_rq->entry_count,
+			phba->sli4_hba.hdr_rq->entry_size,
 			phba->sli4_hba.hdr_rq->host_index,
 			phba->sli4_hba.hdr_rq->hba_index);
 	len += snprintf(pbuffer+len, LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\tID [%02d], RDQE-COUNT [%04d], "
-			"HOST-INDEX [%04x], PORT-INDEX [%04x]\n",
+			"\tDQID[%02d], "
+			"QE-COUNT[%04d], QE-SIZE[%04d], "
+			"HOST-INDEX[%04d], PORT-INDEX[%04d]\n",
 			phba->sli4_hba.dat_rq->queue_id,
 			phba->sli4_hba.dat_rq->entry_count,
+			phba->sli4_hba.dat_rq->entry_size,
 			phba->sli4_hba.dat_rq->host_index,
 			phba->sli4_hba.dat_rq->hba_index);
 
 	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
 }
 
+/**
+ * lpfc_idiag_que_param_check - queue access command parameter sanity check
+ * @q: The pointer to queue structure.
+ * @index: The index into a queue entry.
+ * @count: The number of queue entries to access.
+ *
+ * Description:
+ * The routine performs sanity check on device queue access method commands.
+ *
+ * Returns:
+ * This function returns -EINVAL when fails the sanity check, otherwise, it
+ * returns 0.
+ **/
+static int
+lpfc_idiag_que_param_check(struct lpfc_queue *q, int index, int count)
+{
+	/* Only support single entry read or browsing */
+	if ((count != 1) && (count != LPFC_QUE_ACC_BROWSE))
+		return -EINVAL;
+	if (index > q->entry_count - 1)
+		return -EINVAL;
+	return 0;
+}
+
+/**
+ * lpfc_idiag_queacc_read_qe - read a single entry from the given queue index
+ * @pbuffer: The pointer to buffer to copy the read data into.
+ * @pque: The pointer to the queue to be read.
+ * @index: The index into the queue entry.
+ *
+ * Description:
+ * This routine reads out a single entry from the given queue's index location
+ * and copies it into the buffer provided.
+ *
+ * Returns:
+ * This function returns 0 when it fails, otherwise, it returns the length of
+ * the data read into the buffer provided.
+ **/
+static int
+lpfc_idiag_queacc_read_qe(char *pbuffer, int len, struct lpfc_queue *pque,
+			  uint32_t index)
+{
+	int offset, esize;
+	uint32_t *pentry;
+
+	if (!pbuffer || !pque)
+		return 0;
+
+	esize = pque->entry_size;
+	len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len,
+			"QE-INDEX[%04d]:\n", index);
+
+	offset = 0;
+	pentry = pque->qe[index].address;
+	while (esize > 0) {
+		len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len,
+				"%08x ", *pentry);
+		pentry++;
+		offset += sizeof(uint32_t);
+		esize -= sizeof(uint32_t);
+		if (esize > 0 && !(offset % (4 * sizeof(uint32_t))))
+			len += snprintf(pbuffer+len,
+					LPFC_QUE_ACC_BUF_SIZE-len, "\n");
+	}
+	len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, "\n");
+
+	return len;
+}
+
+/**
+ * lpfc_idiag_queacc_read - idiag debugfs read port queue
+ * @file: The file pointer to read from.
+ * @buf: The buffer to copy the data to.
+ * @nbytes: The number of bytes to read.
+ * @ppos: The position in the file to start reading from.
+ *
+ * Description:
+ * This routine reads data from the @phba device queue memory according to the
+ * idiag command, and copies to user @buf. Depending on the queue dump read
+ * command setup, it does either a single queue entry read or browing through
+ * all entries of the queue.
+ *
+ * Returns:
+ * This function returns the amount of data that was read (this could be less
+ * than @nbytes if the end of the file was reached) or a negative error value.
+ **/
+static ssize_t
+lpfc_idiag_queacc_read(struct file *file, char __user *buf, size_t nbytes,
+		       loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	uint32_t last_index, index, count;
+	struct lpfc_queue *pque = NULL;
+	char *pbuffer;
+	int len = 0;
+
+	/* This is a user read operation */
+	debug->op = LPFC_IDIAG_OP_RD;
+
+	if (!debug->buffer)
+		debug->buffer = kmalloc(LPFC_QUE_ACC_BUF_SIZE, GFP_KERNEL);
+	if (!debug->buffer)
+		return 0;
+	pbuffer = debug->buffer;
+
+	if (*ppos)
+		return 0;
+
+	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_RD) {
+		index = idiag.cmd.data[2];
+		count = idiag.cmd.data[3];
+		pque = (struct lpfc_queue *)idiag.ptr_private;
+	} else
+		return 0;
+
+	/* Browse the queue starting from index */
+	if (count == LPFC_QUE_ACC_BROWSE)
+		goto que_browse;
+
+	/* Read a single entry from the queue */
+	len = lpfc_idiag_queacc_read_qe(pbuffer, len, pque, index);
+
+	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
+
+que_browse:
+
+	/* Browse all entries from the queue */
+	last_index = idiag.offset.last_rd;
+	index = last_index;
+
+	while (len < LPFC_QUE_ACC_SIZE - pque->entry_size) {
+		len = lpfc_idiag_queacc_read_qe(pbuffer, len, pque, index);
+		index++;
+		if (index > pque->entry_count - 1)
+			break;
+	}
+
+	/* Set up the offset for next portion of pci cfg read */
+	if (index > pque->entry_count - 1)
+		index = 0;
+	idiag.offset.last_rd = index;
+
+	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
+}
+
+/**
+ * lpfc_idiag_queacc_write - Syntax check and set up idiag queacc commands
+ * @file: The file pointer to read from.
+ * @buf: The buffer to copy the user data from.
+ * @nbytes: The number of bytes to get.
+ * @ppos: The position in the file to start reading from.
+ *
+ * This routine get the debugfs idiag command struct from user space and then
+ * perform the syntax check for port queue read (dump) or write (set) command
+ * accordingly. In the case of port queue read command, it sets up the command
+ * in the idiag command struct for the following debugfs read operation. In
+ * the case of port queue write operation, it executes the write operation
+ * into the port queue entry accordingly.
+ *
+ * It returns the @nbytges passing in from debugfs user space when successful.
+ * In case of error conditions, it returns proper error code back to the user
+ * space.
+ **/
+static ssize_t
+lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
+			size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	uint32_t qidx, quetp, queid, index, count, offset, value;
+	uint32_t *pentry;
+	struct lpfc_queue *pque;
+	int rc;
+
+	/* This is a user write operation */
+	debug->op = LPFC_IDIAG_OP_WR;
+
+	rc = lpfc_idiag_cmd_get(buf, nbytes, &idiag.cmd);
+	if (rc < 0)
+		return rc;
+
+	/* Get and sanity check on command feilds */
+	quetp  = idiag.cmd.data[0];
+	queid  = idiag.cmd.data[1];
+	index  = idiag.cmd.data[2];
+	count  = idiag.cmd.data[3];
+	offset = idiag.cmd.data[4];
+	value  = idiag.cmd.data[5];
+
+	/* Sanity check on command line arguments */
+	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_ST ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_CL) {
+		if (rc != LPFC_QUE_ACC_WR_CMD_ARG)
+			goto error_out;
+		if (count != 1)
+			goto error_out;
+	} else if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_RD) {
+		if (rc != LPFC_QUE_ACC_RD_CMD_ARG)
+			goto error_out;
+	} else
+		goto error_out;
+
+	switch (quetp) {
+	case LPFC_IDIAG_EQ:
+		/* Slow-path event queue */
+		if (phba->sli4_hba.sp_eq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.sp_eq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.sp_eq;
+			goto pass_check;
+		}
+		/* Fast-path event queue */
+		for (qidx = 0; qidx < phba->cfg_fcp_eq_count; qidx++) {
+			if (phba->sli4_hba.fp_eq[qidx]->queue_id == queid) {
+				/* Sanity check */
+				rc = lpfc_idiag_que_param_check(
+						phba->sli4_hba.fp_eq[qidx],
+						index, count);
+				if (rc)
+					goto error_out;
+				idiag.ptr_private = phba->sli4_hba.fp_eq[qidx];
+				goto pass_check;
+			}
+		}
+		goto error_out;
+		break;
+	case LPFC_IDIAG_CQ:
+		/* MBX complete queue */
+		if (phba->sli4_hba.mbx_cq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.mbx_cq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.mbx_cq;
+			goto pass_check;
+		}
+		/* ELS complete queue */
+		if (phba->sli4_hba.els_cq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.els_cq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.els_cq;
+			goto pass_check;
+		}
+		/* FCP complete queue */
+		for (qidx = 0; qidx < phba->cfg_fcp_eq_count; qidx++) {
+			if (phba->sli4_hba.fcp_cq[qidx]->queue_id == queid) {
+				/* Sanity check */
+				rc = lpfc_idiag_que_param_check(
+						phba->sli4_hba.fcp_cq[qidx],
+						index, count);
+				if (rc)
+					goto error_out;
+				idiag.ptr_private =
+						phba->sli4_hba.fcp_cq[qidx];
+				goto pass_check;
+			}
+		}
+		goto error_out;
+		break;
+	case LPFC_IDIAG_MQ:
+		/* MBX work queue */
+		if (phba->sli4_hba.mbx_wq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.mbx_wq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.mbx_wq;
+			goto pass_check;
+		}
+		break;
+	case LPFC_IDIAG_WQ:
+		/* ELS work queue */
+		if (phba->sli4_hba.els_wq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.els_wq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.els_wq;
+			goto pass_check;
+		}
+		/* FCP work queue */
+		for (qidx = 0; qidx < phba->cfg_fcp_wq_count; qidx++) {
+			if (phba->sli4_hba.fcp_wq[qidx]->queue_id == queid) {
+				/* Sanity check */
+				rc = lpfc_idiag_que_param_check(
+						phba->sli4_hba.fcp_wq[qidx],
+						index, count);
+				if (rc)
+					goto error_out;
+				idiag.ptr_private =
+					phba->sli4_hba.fcp_wq[qidx];
+				goto pass_check;
+			}
+		}
+		goto error_out;
+		break;
+	case LPFC_IDIAG_RQ:
+		/* HDR queue */
+		if (phba->sli4_hba.hdr_rq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.hdr_rq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.hdr_rq;
+			goto pass_check;
+		}
+		/* DAT queue */
+		if (phba->sli4_hba.dat_rq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.dat_rq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.dat_rq;
+			goto pass_check;
+		}
+		goto error_out;
+		break;
+	default:
+		goto error_out;
+		break;
+	}
+
+pass_check:
+
+	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_RD) {
+		if (count == LPFC_QUE_ACC_BROWSE)
+			idiag.offset.last_rd = index;
+	}
+
+	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_ST ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_CL) {
+		/* Additional sanity checks on write operation */
+		pque = (struct lpfc_queue *)idiag.ptr_private;
+		if (offset > pque->entry_size/sizeof(uint32_t) - 1)
+			goto error_out;
+		pentry = pque->qe[index].address;
+		pentry += offset;
+		if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR)
+			*pentry = value;
+		if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_ST)
+			*pentry |= value;
+		if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_CL)
+			*pentry &= ~value;
+	}
+	return nbytes;
+
+error_out:
+	/* Clean out command structure on command error out */
+	memset(&idiag, 0, sizeof(idiag));
+	return -EINVAL;
+}
+
+/**
+ * lpfc_idiag_drbacc_read_reg - idiag debugfs read a doorbell register
+ * @phba: The pointer to hba structure.
+ * @pbuffer: The pointer to the buffer to copy the data to.
+ * @len: The lenght of bytes to copied.
+ * @drbregid: The id to doorbell registers.
+ *
+ * Description:
+ * This routine reads a doorbell register and copies its content to the
+ * user buffer pointed to by @pbuffer.
+ *
+ * Returns:
+ * This function returns the amount of data that was copied into @pbuffer.
+ **/
+static int
+lpfc_idiag_drbacc_read_reg(struct lpfc_hba *phba, char *pbuffer,
+			   int len, uint32_t drbregid)
+{
+
+	if (!pbuffer)
+		return 0;
+
+	switch (drbregid) {
+	case LPFC_DRB_EQCQ:
+		len += snprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len,
+				"EQCQ-DRB-REG: 0x%08x\n",
+				readl(phba->sli4_hba.EQCQDBregaddr));
+		break;
+	case LPFC_DRB_MQ:
+		len += snprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len,
+				"MQ-DRB-REG:   0x%08x\n",
+				readl(phba->sli4_hba.MQDBregaddr));
+		break;
+	case LPFC_DRB_WQ:
+		len += snprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len,
+				"WQ-DRB-REG:   0x%08x\n",
+				readl(phba->sli4_hba.WQDBregaddr));
+		break;
+	case LPFC_DRB_RQ:
+		len += snprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len,
+				"RQ-DRB-REG:   0x%08x\n",
+				readl(phba->sli4_hba.RQDBregaddr));
+		break;
+	default:
+		break;
+	}
+
+	return len;
+}
+
+/**
+ * lpfc_idiag_drbacc_read - idiag debugfs read port doorbell
+ * @file: The file pointer to read from.
+ * @buf: The buffer to copy the data to.
+ * @nbytes: The number of bytes to read.
+ * @ppos: The position in the file to start reading from.
+ *
+ * Description:
+ * This routine reads data from the @phba device doorbell register according
+ * to the idiag command, and copies to user @buf. Depending on the doorbell
+ * register read command setup, it does either a single doorbell register
+ * read or dump all doorbell registers.
+ *
+ * Returns:
+ * This function returns the amount of data that was read (this could be less
+ * than @nbytes if the end of the file was reached) or a negative error value.
+ **/
+static ssize_t
+lpfc_idiag_drbacc_read(struct file *file, char __user *buf, size_t nbytes,
+		       loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	uint32_t drb_reg_id, i;
+	char *pbuffer;
+	int len = 0;
+
+	/* This is a user read operation */
+	debug->op = LPFC_IDIAG_OP_RD;
+
+	if (!debug->buffer)
+		debug->buffer = kmalloc(LPFC_DRB_ACC_BUF_SIZE, GFP_KERNEL);
+	if (!debug->buffer)
+		return 0;
+	pbuffer = debug->buffer;
+
+	if (*ppos)
+		return 0;
+
+	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_RD)
+		drb_reg_id = idiag.cmd.data[0];
+	else
+		return 0;
+
+	if (drb_reg_id == LPFC_DRB_ACC_ALL)
+		for (i = 1; i <= LPFC_DRB_MAX; i++)
+			len = lpfc_idiag_drbacc_read_reg(phba,
+							 pbuffer, len, i);
+	else
+		len = lpfc_idiag_drbacc_read_reg(phba,
+						 pbuffer, len, drb_reg_id);
+
+	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
+}
+
+/**
+ * lpfc_idiag_drbacc_write - Syntax check and set up idiag drbacc commands
+ * @file: The file pointer to read from.
+ * @buf: The buffer to copy the user data from.
+ * @nbytes: The number of bytes to get.
+ * @ppos: The position in the file to start reading from.
+ *
+ * This routine get the debugfs idiag command struct from user space and then
+ * perform the syntax check for port doorbell register read (dump) or write
+ * (set) command accordingly. In the case of port queue read command, it sets
+ * up the command in the idiag command struct for the following debugfs read
+ * operation. In the case of port doorbell register write operation, it
+ * executes the write operation into the port doorbell register accordingly.
+ *
+ * It returns the @nbytges passing in from debugfs user space when successful.
+ * In case of error conditions, it returns proper error code back to the user
+ * space.
+ **/
+static ssize_t
+lpfc_idiag_drbacc_write(struct file *file, const char __user *buf,
+			size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	uint32_t drb_reg_id, value, reg_val;
+	void __iomem *drb_reg;
+	int rc;
+
+	/* This is a user write operation */
+	debug->op = LPFC_IDIAG_OP_WR;
+
+	rc = lpfc_idiag_cmd_get(buf, nbytes, &idiag.cmd);
+	if (rc < 0)
+		return rc;
+
+	/* Sanity check on command line arguments */
+	drb_reg_id = idiag.cmd.data[0];
+	value = idiag.cmd.data[1];
+
+	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_WR ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_ST ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_CL) {
+		if (rc != LPFC_DRB_ACC_WR_CMD_ARG)
+			goto error_out;
+		if (drb_reg_id > LPFC_DRB_MAX)
+			goto error_out;
+	} else if (idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_RD) {
+		if (rc != LPFC_DRB_ACC_RD_CMD_ARG)
+			goto error_out;
+		if ((drb_reg_id > LPFC_DRB_MAX) &&
+		    (drb_reg_id != LPFC_DRB_ACC_ALL))
+			goto error_out;
+	} else
+		goto error_out;
+
+	/* Perform the write access operation */
+	if (idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_WR ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_ST ||
+	    idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_CL) {
+		switch (drb_reg_id) {
+		case LPFC_DRB_EQCQ:
+			drb_reg = phba->sli4_hba.EQCQDBregaddr;
+			break;
+		case LPFC_DRB_MQ:
+			drb_reg = phba->sli4_hba.MQDBregaddr;
+			break;
+		case LPFC_DRB_WQ:
+			drb_reg = phba->sli4_hba.WQDBregaddr;
+			break;
+		case LPFC_DRB_RQ:
+			drb_reg = phba->sli4_hba.RQDBregaddr;
+			break;
+		default:
+			goto error_out;
+		}
+
+		if (idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_WR)
+			reg_val = value;
+		if (idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_ST) {
+			reg_val = readl(drb_reg);
+			reg_val |= value;
+		}
+		if (idiag.cmd.opcode == LPFC_IDIAG_CMD_DRBACC_CL) {
+			reg_val = readl(drb_reg);
+			reg_val &= ~value;
+		}
+		writel(reg_val, drb_reg);
+		readl(drb_reg); /* flush */
+	}
+	return nbytes;
+
+error_out:
+	/* Clean out command structure on command error out */
+	memset(&idiag, 0, sizeof(idiag));
+	return -EINVAL;
+}
+
 #undef lpfc_debugfs_op_disc_trc
 static const struct file_operations lpfc_debugfs_op_disc_trc = {
 	.owner =        THIS_MODULE,
@@ -1986,6 +2425,26 @@ static const struct file_operations lpfc_idiag_op_queInfo = {
 	.release =      lpfc_idiag_release,
 };
 
+#undef lpfc_idiag_op_queacc
+static const struct file_operations lpfc_idiag_op_queAcc = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_idiag_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_idiag_queacc_read,
+	.write =        lpfc_idiag_queacc_write,
+	.release =      lpfc_idiag_cmd_release,
+};
+
+#undef lpfc_idiag_op_drbacc
+static const struct file_operations lpfc_idiag_op_drbAcc = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_idiag_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_idiag_drbacc_read,
+	.write =        lpfc_idiag_drbacc_write,
+	.release =      lpfc_idiag_cmd_release,
+};
+
 #endif
 
 /**
@@ -2261,6 +2720,32 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 		}
 	}
 
+	/* iDiag access PCI function queue */
+	snprintf(name, sizeof(name), "queAcc");
+	if (!phba->idiag_que_acc) {
+		phba->idiag_que_acc =
+			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
+				phba->idiag_root, phba, &lpfc_idiag_op_queAcc);
+		if (!phba->idiag_que_acc) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+					 "2926 Can't create idiag debugfs\n");
+			goto debug_failed;
+		}
+	}
+
+	/* iDiag access PCI function doorbell registers */
+	snprintf(name, sizeof(name), "drbAcc");
+	if (!phba->idiag_drb_acc) {
+		phba->idiag_drb_acc =
+			debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
+				phba->idiag_root, phba, &lpfc_idiag_op_drbAcc);
+		if (!phba->idiag_drb_acc) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+					 "2927 Can't create idiag debugfs\n");
+			goto debug_failed;
+		}
+	}
+
 debug_failed:
 	return;
 #endif
@@ -2339,6 +2824,16 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 		 * iDiag release
 		 */
 		if (phba->sli_rev == LPFC_SLI_REV4) {
+			if (phba->idiag_drb_acc) {
+				/* iDiag drbAcc */
+				debugfs_remove(phba->idiag_drb_acc);
+				phba->idiag_drb_acc = NULL;
+			}
+			if (phba->idiag_que_acc) {
+				/* iDiag queAcc */
+				debugfs_remove(phba->idiag_que_acc);
+				phba->idiag_que_acc = NULL;
+			}
 			if (phba->idiag_que_info) {
 				/* iDiag queInfo */
 				debugfs_remove(phba->idiag_que_info);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 91b9a9427cda..6525a5e62d27 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -39,13 +39,42 @@
 /* hbqinfo output buffer size */
 #define LPFC_HBQINFO_SIZE 8192
 
-/* rdPciConf output buffer size */
+/* pciConf */
+#define LPFC_PCI_CFG_BROWSE 0xffff
+#define LPFC_PCI_CFG_RD_CMD_ARG 2
+#define LPFC_PCI_CFG_WR_CMD_ARG 3
 #define LPFC_PCI_CFG_SIZE 4096
 #define LPFC_PCI_CFG_RD_BUF_SIZE (LPFC_PCI_CFG_SIZE/2)
 #define LPFC_PCI_CFG_RD_SIZE (LPFC_PCI_CFG_SIZE/4)
 
-/* queue info output buffer size */
-#define LPFC_QUE_INFO_GET_BUF_SIZE 2048
+/* queue info */
+#define LPFC_QUE_INFO_GET_BUF_SIZE 4096
+
+/* queue acc */
+#define LPFC_QUE_ACC_BROWSE 0xffff
+#define LPFC_QUE_ACC_RD_CMD_ARG 4
+#define LPFC_QUE_ACC_WR_CMD_ARG 6
+#define LPFC_QUE_ACC_BUF_SIZE 4096
+#define LPFC_QUE_ACC_SIZE (LPFC_QUE_ACC_BUF_SIZE/2)
+
+#define LPFC_IDIAG_EQ 1
+#define LPFC_IDIAG_CQ 2
+#define LPFC_IDIAG_MQ 3
+#define LPFC_IDIAG_WQ 4
+#define LPFC_IDIAG_RQ 5
+
+/* doorbell acc */
+#define LPFC_DRB_ACC_ALL 0xffff
+#define LPFC_DRB_ACC_RD_CMD_ARG 1
+#define LPFC_DRB_ACC_WR_CMD_ARG 2
+#define LPFC_DRB_ACC_BUF_SIZE 256
+
+#define LPFC_DRB_EQCQ 1
+#define LPFC_DRB_MQ   2
+#define LPFC_DRB_WQ   3
+#define LPFC_DRB_RQ   4
+
+#define LPFC_DRB_MAX  4
 
 #define SIZE_U8  sizeof(uint8_t)
 #define SIZE_U16 sizeof(uint16_t)
@@ -73,13 +102,23 @@ struct lpfc_idiag_offset {
 	uint32_t last_rd;
 };
 
-#define LPFC_IDIAG_CMD_DATA_SIZE 4
+#define LPFC_IDIAG_CMD_DATA_SIZE 8
 struct lpfc_idiag_cmd {
 	uint32_t opcode;
 #define LPFC_IDIAG_CMD_PCICFG_RD 0x00000001
 #define LPFC_IDIAG_CMD_PCICFG_WR 0x00000002
 #define LPFC_IDIAG_CMD_PCICFG_ST 0x00000003
 #define LPFC_IDIAG_CMD_PCICFG_CL 0x00000004
+
+#define LPFC_IDIAG_CMD_QUEACC_RD 0x00000011
+#define LPFC_IDIAG_CMD_QUEACC_WR 0x00000012
+#define LPFC_IDIAG_CMD_QUEACC_ST 0x00000013
+#define LPFC_IDIAG_CMD_QUEACC_CL 0x00000014
+
+#define LPFC_IDIAG_CMD_DRBACC_RD 0x00000021
+#define LPFC_IDIAG_CMD_DRBACC_WR 0x00000022
+#define LPFC_IDIAG_CMD_DRBACC_ST 0x00000023
+#define LPFC_IDIAG_CMD_DRBACC_CL 0x00000024
 	uint32_t data[LPFC_IDIAG_CMD_DATA_SIZE];
 };
 
@@ -87,6 +126,7 @@ struct lpfc_idiag {
 	uint32_t active;
 	struct lpfc_idiag_cmd cmd;
 	struct lpfc_idiag_offset offset;
+	void *ptr_private;
 };
 #endif
 
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index d34b69f9cdb1..e2c452467c8b 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -670,6 +670,7 @@ lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 			 * Driver needs to re-reg VPI in order for f/w
 			 * to update the MAC address.
 			 */
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 			lpfc_register_new_vport(phba, vport, ndlp);
 			return 0;
 	}
@@ -869,8 +870,8 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		 */
 		if ((phba->hba_flag & HBA_FIP_SUPPORT) &&
 		    (phba->fcf.fcf_flag & FCF_DISCOVERY) &&
-		    (irsp->ulpStatus != IOSTAT_LOCAL_REJECT) &&
-		    (irsp->un.ulpWord[4] != IOERR_SLI_ABORTED)) {
+		    !((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+		     (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) {
 			lpfc_printf_log(phba, KERN_WARNING, LOG_FIP | LOG_ELS,
 					"2611 FLOGI failed on FCF (x%x), "
 					"status:x%x/x%x, tmo:x%x, perform "
@@ -1085,14 +1086,15 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	if (sp->cmn.fcphHigh < FC_PH3)
 		sp->cmn.fcphHigh = FC_PH3;
 
-	if  ((phba->sli_rev == LPFC_SLI_REV4) &&
-	     (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
-	      LPFC_SLI_INTF_IF_TYPE_0)) {
-		elsiocb->iocb.ulpCt_h = ((SLI4_CT_FCFI >> 1) & 1);
-		elsiocb->iocb.ulpCt_l = (SLI4_CT_FCFI & 1);
-		/* FLOGI needs to be 3 for WQE FCFI */
-		/* Set the fcfi to the fcfi we registered with */
-		elsiocb->iocb.ulpContext = phba->fcf.fcfi;
+	if  (phba->sli_rev == LPFC_SLI_REV4) {
+		if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
+		    LPFC_SLI_INTF_IF_TYPE_0) {
+			elsiocb->iocb.ulpCt_h = ((SLI4_CT_FCFI >> 1) & 1);
+			elsiocb->iocb.ulpCt_l = (SLI4_CT_FCFI & 1);
+			/* FLOGI needs to be 3 for WQE FCFI */
+			/* Set the fcfi to the fcfi we registered with */
+			elsiocb->iocb.ulpContext = phba->fcf.fcfi;
+		}
 	} else if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
 		sp->cmn.request_multiple_Nport = 1;
 		/* For FLOGI, Let FLOGI rsp set the NPortID for VPI 0 */
@@ -4107,13 +4109,13 @@ lpfc_els_clear_rrq(struct lpfc_vport *vport,
 	pcmd += sizeof(uint32_t);
 	rrq = (struct RRQ *)pcmd;
 	rrq->rrq_exchg = be32_to_cpu(rrq->rrq_exchg);
-	rxid = be16_to_cpu(bf_get(rrq_rxid, rrq));
+	rxid = bf_get(rrq_rxid, rrq);
 
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
 			"2883 Clear RRQ for SID:x%x OXID:x%x RXID:x%x"
 			" x%x x%x\n",
 			be32_to_cpu(bf_get(rrq_did, rrq)),
-			be16_to_cpu(bf_get(rrq_oxid, rrq)),
+			bf_get(rrq_oxid, rrq),
 			rxid,
 			iocb->iotag, iocb->iocb.ulpContext);
 
@@ -4121,7 +4123,7 @@ lpfc_els_clear_rrq(struct lpfc_vport *vport,
 		"Clear RRQ:  did:x%x flg:x%x exchg:x%.08x",
 		ndlp->nlp_DID, ndlp->nlp_flag, rrq->rrq_exchg);
 	if (vport->fc_myDID == be32_to_cpu(bf_get(rrq_did, rrq)))
-		xri = be16_to_cpu(bf_get(rrq_oxid, rrq));
+		xri = bf_get(rrq_oxid, rrq);
 	else
 		xri = rxid;
 	prrq = lpfc_get_active_rrq(vport, xri, ndlp->nlp_DID);
@@ -7290,8 +7292,9 @@ lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	struct lpfc_vport *vport = cmdiocb->vport;
 	IOCB_t *irsp;
 	struct lpfc_nodelist *ndlp;
-	ndlp = (struct lpfc_nodelist *)cmdiocb->context1;
+	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
+	ndlp = (struct lpfc_nodelist *)cmdiocb->context1;
 	irsp = &rspiocb->iocb;
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"LOGO npiv cmpl:  status:x%x/x%x did:x%x",
@@ -7302,6 +7305,19 @@ lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	/* Trigger the release of the ndlp after logo */
 	lpfc_nlp_put(ndlp);
+
+	/* NPIV LOGO completes to NPort <nlp_DID> */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+			 "2928 NPIV LOGO completes to NPort x%x "
+			 "Data: x%x x%x x%x x%x\n",
+			 ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4],
+			 irsp->ulpTimeout, vport->num_disc_nodes);
+
+	if (irsp->ulpStatus == IOSTAT_SUCCESS) {
+		spin_lock_irq(shost->host_lock);
+		vport->fc_flag &= ~FC_FABRIC;
+		spin_unlock_irq(shost->host_lock);
+	}
 }
 
 /**
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 301498301a8f..7a35df5e2038 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -3569,6 +3569,10 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		"rport add:       did:x%x flg:x%x type x%x",
 		ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
 
+	/* Don't add the remote port if unloading. */
+	if (vport->load_flag & FC_UNLOADING)
+		return;
+
 	ndlp->rport = rport = fc_remote_port_add(shost, 0, &rport_ids);
 	if (!rport || !get_device(&rport->dev)) {
 		dev_printk(KERN_WARNING, &phba->pcidev->dev,
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 8433ac0d9fb4..4dff668ebdad 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1059,6 +1059,11 @@ struct rq_context {
 #define lpfc_rq_context_rqe_size_SHIFT	8		/* Version 1 Only */
 #define lpfc_rq_context_rqe_size_MASK	0x0000000F
 #define lpfc_rq_context_rqe_size_WORD	word0
+#define LPFC_RQE_SIZE_8		2
+#define LPFC_RQE_SIZE_16	3
+#define LPFC_RQE_SIZE_32	4
+#define LPFC_RQE_SIZE_64	5
+#define LPFC_RQE_SIZE_128	6
 #define lpfc_rq_context_page_size_SHIFT	0		/* Version 1 Only */
 #define lpfc_rq_context_page_size_MASK	0x000000FF
 #define lpfc_rq_context_page_size_WORD	word0
@@ -2108,6 +2113,8 @@ struct lpfc_mbx_pc_sli4_params {
 #define sgl_pp_align_WORD			word12
 	uint32_t rsvd_13_63[51];
 };
+#define SLI4_PAGE_ALIGN(addr) (((addr)+((SLI4_PAGE_SIZE)-1)) \
+			       &(~((SLI4_PAGE_SIZE)-1)))
 
 struct lpfc_sli4_parameters {
 	uint32_t word0;
@@ -2491,6 +2498,9 @@ struct wqe_common {
 #define wqe_reqtag_SHIFT      0
 #define wqe_reqtag_MASK       0x0000FFFF
 #define wqe_reqtag_WORD       word9
+#define wqe_temp_rpi_SHIFT    16
+#define wqe_temp_rpi_MASK     0x0000FFFF
+#define wqe_temp_rpi_WORD     word9
 #define wqe_rcvoxid_SHIFT     16
 #define wqe_rcvoxid_MASK      0x0000FFFF
 #define wqe_rcvoxid_WORD      word9
@@ -2524,7 +2534,7 @@ struct wqe_common {
 #define wqe_wqes_WORD         word10
 /* Note that this field overlaps above fields */
 #define wqe_wqid_SHIFT        1
-#define wqe_wqid_MASK         0x0000007f
+#define wqe_wqid_MASK         0x00007fff
 #define wqe_wqid_WORD         word10
 #define wqe_pri_SHIFT         16
 #define wqe_pri_MASK          0x00000007
@@ -2621,7 +2631,11 @@ struct xmit_els_rsp64_wqe {
 	uint32_t rsvd4;
 	struct wqe_did wqe_dest;
 	struct wqe_common wqe_com; /* words 6-11 */
-	uint32_t rsvd_12_15[4];
+	uint32_t word12;
+#define wqe_rsp_temp_rpi_SHIFT    0
+#define wqe_rsp_temp_rpi_MASK     0x0000FFFF
+#define wqe_rsp_temp_rpi_WORD     word12
+	uint32_t rsvd_13_15[3];
 };
 
 struct xmit_bls_rsp64_wqe {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 505f88443b5c..7dda036a1af3 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3209,9 +3209,9 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
 	phba->sli4_hba.link_state.logical_speed =
 			bf_get(lpfc_acqe_logical_link_speed, acqe_link);
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"2900 Async FCoE Link event - Speed:%dGBit duplex:x%x "
-			"LA Type:x%x Port Type:%d Port Number:%d Logical "
-			"speed:%dMbps Fault:%d\n",
+			"2900 Async FC/FCoE Link event - Speed:%dGBit "
+			"duplex:x%x LA Type:x%x Port Type:%d Port Number:%d "
+			"Logical speed:%dMbps Fault:%d\n",
 			phba->sli4_hba.link_state.speed,
 			phba->sli4_hba.link_state.topology,
 			phba->sli4_hba.link_state.status,
@@ -4906,6 +4906,7 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	uint16_t rpi_limit, curr_rpi_range;
 	struct lpfc_dmabuf *dmabuf;
 	struct lpfc_rpi_hdr *rpi_hdr;
+	uint32_t rpi_count;
 
 	rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base +
 		    phba->sli4_hba.max_cfg_param.max_rpi - 1;
@@ -4920,7 +4921,9 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	 * and to allow the full max_rpi range per port.
 	 */
 	if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit)
-		return NULL;
+		rpi_count = rpi_limit - curr_rpi_range;
+	else
+		rpi_count = LPFC_RPI_HDR_COUNT;
 
 	/*
 	 * First allocate the protocol header region for the port.  The
@@ -4961,7 +4964,7 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	 * The next_rpi stores the next module-64 rpi value to post
 	 * in any subsequent rpi memory region postings.
 	 */
-	phba->sli4_hba.next_rpi += LPFC_RPI_HDR_COUNT;
+	phba->sli4_hba.next_rpi += rpi_count;
 	spin_unlock_irq(&phba->hbalock);
 	return rpi_hdr;
 
@@ -7004,7 +7007,8 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 		lpfc_sli4_bar0_register_memmap(phba, if_type);
 	}
 
-	if (pci_resource_start(pdev, 2)) {
+	if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) &&
+	    (pci_resource_start(pdev, 2))) {
 		/*
 		 * Map SLI4 if type 0 HBA Control Register base to a kernel
 		 * virtual address and setup the registers.
@@ -7021,7 +7025,8 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
 		lpfc_sli4_bar1_register_memmap(phba);
 	}
 
-	if (pci_resource_start(pdev, 4)) {
+	if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) &&
+	    (pci_resource_start(pdev, 4))) {
 		/*
 		 * Map SLI4 if type 0 HBA Doorbell Register base to a kernel
 		 * virtual address and setup the registers.
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index fbab9734e9b4..e6ce9033f85e 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -1736,7 +1736,7 @@ lpfc_sli4_config(struct lpfc_hba *phba, struct lpfcMboxq *mbox,
 	}
 
 	/* Setup for the none-embedded mbox command */
-	pcount = (PAGE_ALIGN(length))/SLI4_PAGE_SIZE;
+	pcount = (SLI4_PAGE_ALIGN(length))/SLI4_PAGE_SIZE;
 	pcount = (pcount > LPFC_SLI4_MBX_SGE_MAX_PAGES) ?
 				LPFC_SLI4_MBX_SGE_MAX_PAGES : pcount;
 	/* Allocate record for keeping SGE virtual addresses */
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index fe7cc84e773b..84e4481b2406 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3238,9 +3238,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	if (!lpfc_cmd) {
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
 			 "2873 SCSI Layer I/O Abort Request IO CMPL Status "
-			 "x%x ID %d "
-			 "LUN %d snum %#lx\n", ret, cmnd->device->id,
-			 cmnd->device->lun, cmnd->serial_number);
+			 "x%x ID %d LUN %d\n",
+			 ret, cmnd->device->id, cmnd->device->lun);
 		return SUCCESS;
 	}
 
@@ -3318,16 +3317,15 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
 				 "0748 abort handler timed out waiting "
 				 "for abort to complete: ret %#x, ID %d, "
-				 "LUN %d, snum %#lx\n",
-				 ret, cmnd->device->id, cmnd->device->lun,
-				 cmnd->serial_number);
+				 "LUN %d\n",
+				 ret, cmnd->device->id, cmnd->device->lun);
 	}
 
  out:
 	lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
 			 "0749 SCSI Layer I/O Abort Request Status x%x ID %d "
-			 "LUN %d snum %#lx\n", ret, cmnd->device->id,
-			 cmnd->device->lun, cmnd->serial_number);
+			 "LUN %d\n", ret, cmnd->device->id,
+			 cmnd->device->lun);
 	return ret;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index dacabbe0a586..837d272cb2d6 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -4769,8 +4769,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	else
 		phba->hba_flag &= ~HBA_FIP_SUPPORT;
 
-	if (phba->sli_rev != LPFC_SLI_REV4 ||
-	    !(phba->hba_flag & HBA_FCOE_MODE)) {
+	if (phba->sli_rev != LPFC_SLI_REV4) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 			"0376 READ_REV Error. SLI Level %d "
 			"FCoE enabled %d\n",
@@ -5018,10 +5017,11 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		lpfc_reg_fcfi(phba, mboxq);
 		mboxq->vport = phba->pport;
 		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
-		if (rc == MBX_SUCCESS)
-			rc = 0;
-		else
+		if (rc != MBX_SUCCESS)
 			goto out_unset_queue;
+		rc = 0;
+		phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi,
+					&mboxq->u.mqe.un.reg_fcfi);
 	}
 	/*
 	 * The port is ready, set the host's link state to LINK_DOWN
@@ -6402,6 +6402,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 	uint32_t els_id = LPFC_ELS_ID_DEFAULT;
 	int numBdes, i;
 	struct ulp_bde64 bde;
+	struct lpfc_nodelist *ndlp;
 
 	fip = phba->hba_flag & HBA_FIP_SUPPORT;
 	/* The fcp commands will set command type */
@@ -6447,6 +6448,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 
 	switch (iocbq->iocb.ulpCommand) {
 	case CMD_ELS_REQUEST64_CR:
+		ndlp = (struct lpfc_nodelist *)iocbq->context1;
 		if (!iocbq->iocb.ulpLe) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"2007 Only Limited Edition cmd Format"
@@ -6472,6 +6474,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			els_id = ((iocbq->iocb_flag & LPFC_FIP_ELS_ID_MASK)
 					>> LPFC_FIP_ELS_ID_SHIFT);
 		}
+		bf_set(wqe_temp_rpi, &wqe->els_req.wqe_com, ndlp->nlp_rpi);
 		bf_set(wqe_els_id, &wqe->els_req.wqe_com, els_id);
 		bf_set(wqe_dbde, &wqe->els_req.wqe_com, 1);
 		bf_set(wqe_iod, &wqe->els_req.wqe_com, LPFC_WQE_IOD_READ);
@@ -6604,6 +6607,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		command_type = OTHER_COMMAND;
 	break;
 	case CMD_XMIT_ELS_RSP64_CX:
+		ndlp = (struct lpfc_nodelist *)iocbq->context1;
 		/* words0-2 BDE memcpy */
 		/* word3 iocb=iotag32 wqe=response_payload_len */
 		wqe->xmit_els_rsp.response_payload_len = xmit_len;
@@ -6626,6 +6630,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		bf_set(wqe_lenloc, &wqe->xmit_els_rsp.wqe_com,
 		       LPFC_WQE_LENLOC_WORD3);
 		bf_set(wqe_ebde_cnt, &wqe->xmit_els_rsp.wqe_com, 0);
+		bf_set(wqe_rsp_temp_rpi, &wqe->xmit_els_rsp, ndlp->nlp_rpi);
 		command_type = OTHER_COMMAND;
 	break;
 	case CMD_CLOSE_XRI_CN:
@@ -10522,8 +10527,8 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	bf_set(lpfc_mbox_hdr_version, &shdr->request,
 	       phba->sli4_hba.pc_sli4_params.cqv);
 	if (phba->sli4_hba.pc_sli4_params.cqv == LPFC_Q_CREATE_VERSION_2) {
-		bf_set(lpfc_mbx_cq_create_page_size, &cq_create->u.request,
-		       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		/* FW only supports 1. Should be PAGE_SIZE/SLI4_PAGE_SIZE */
+		bf_set(lpfc_mbx_cq_create_page_size, &cq_create->u.request, 1);
 		bf_set(lpfc_cq_eq_id_2, &cq_create->u.request.context,
 		       eq->queue_id);
 	} else {
@@ -10967,6 +10972,12 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 		       &rq_create->u.request.context,
 		       hrq->entry_count);
 		rq_create->u.request.context.buffer_size = LPFC_HDR_BUF_SIZE;
+		bf_set(lpfc_rq_context_rqe_size,
+		       &rq_create->u.request.context,
+		       LPFC_RQE_SIZE_8);
+		bf_set(lpfc_rq_context_page_size,
+		       &rq_create->u.request.context,
+		       (PAGE_SIZE/SLI4_PAGE_SIZE));
 	} else {
 		switch (hrq->entry_count) {
 		default:
@@ -11042,9 +11053,12 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	       phba->sli4_hba.pc_sli4_params.rqv);
 	if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) {
 		bf_set(lpfc_rq_context_rqe_count_1,
-		       &rq_create->u.request.context,
-		       hrq->entry_count);
+		       &rq_create->u.request.context, hrq->entry_count);
 		rq_create->u.request.context.buffer_size = LPFC_DATA_BUF_SIZE;
+		bf_set(lpfc_rq_context_rqe_size, &rq_create->u.request.context,
+		       LPFC_RQE_SIZE_8);
+		bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context,
+		       (PAGE_SIZE/SLI4_PAGE_SIZE));
 	} else {
 		switch (drq->entry_count) {
 		default:
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 2404d1d65563..c03921b1232c 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.22"
+#define LPFC_DRIVER_VERSION "8.3.23"
 #define LPFC_DRIVER_NAME		"lpfc"
 #define LPFC_SP_DRIVER_HANDLER_NAME	"lpfc:sp"
 #define LPFC_FP_DRIVER_HANDLER_NAME	"lpfc:fp"
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index f2684dd09ed0..5c1776406c96 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -1469,8 +1469,8 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status)
 			if( scb->state & SCB_ABORT ) {
 
 				printk(KERN_WARNING
-				"megaraid: aborted cmd %lx[%x] complete.\n",
-					scb->cmd->serial_number, scb->idx);
+				"megaraid: aborted cmd [%x] complete.\n",
+					scb->idx);
 
 				scb->cmd->result = (DID_ABORT << 16);
 
@@ -1488,8 +1488,8 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status)
 			if( scb->state & SCB_RESET ) {
 
 				printk(KERN_WARNING
-				"megaraid: reset cmd %lx[%x] complete.\n",
-					scb->cmd->serial_number, scb->idx);
+				"megaraid: reset cmd [%x] complete.\n",
+					scb->idx);
 
 				scb->cmd->result = (DID_RESET << 16);
 
@@ -1958,8 +1958,8 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor)
 	struct list_head	*pos, *next;
 	scb_t			*scb;
 
-	printk(KERN_WARNING "megaraid: %s-%lx cmd=%x <c=%d t=%d l=%d>\n",
-	     (aor == SCB_ABORT)? "ABORTING":"RESET", cmd->serial_number,
+	printk(KERN_WARNING "megaraid: %s cmd=%x <c=%d t=%d l=%d>\n",
+	     (aor == SCB_ABORT)? "ABORTING":"RESET",
 	     cmd->cmnd[0], cmd->device->channel, 
 	     cmd->device->id, cmd->device->lun);
 
@@ -1983,9 +1983,9 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor)
 			if( scb->state & SCB_ISSUED ) {
 
 				printk(KERN_WARNING
-					"megaraid: %s-%lx[%x], fw owner.\n",
+					"megaraid: %s[%x], fw owner.\n",
 					(aor==SCB_ABORT) ? "ABORTING":"RESET",
-					cmd->serial_number, scb->idx);
+					scb->idx);
 
 				return FALSE;
 			}
@@ -1996,9 +1996,9 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor)
 				 * list
 				 */
 				printk(KERN_WARNING
-					"megaraid: %s-%lx[%x], driver owner.\n",
+					"megaraid: %s-[%x], driver owner.\n",
 					(aor==SCB_ABORT) ? "ABORTING":"RESET",
-					cmd->serial_number, scb->idx);
+					scb->idx);
 
 				mega_free_scb(adapter, scb);
 
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 1dba32870b4c..2e6619eff3ea 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -2315,8 +2315,8 @@ megaraid_mbox_dpc(unsigned long devp)
 		// Was an abort issued for this command earlier
 		if (scb->state & SCB_ABORT) {
 			con_log(CL_ANN, (KERN_NOTICE
-			"megaraid: aborted cmd %lx[%x] completed\n",
-				scp->serial_number, scb->sno));
+			"megaraid: aborted cmd [%x] completed\n",
+				scb->sno));
 		}
 
 		/*
@@ -2472,8 +2472,8 @@ megaraid_abort_handler(struct scsi_cmnd *scp)
 	raid_dev	= ADAP2RAIDDEV(adapter);
 
 	con_log(CL_ANN, (KERN_WARNING
-		"megaraid: aborting-%ld cmd=%x <c=%d t=%d l=%d>\n",
-		scp->serial_number, scp->cmnd[0], SCP2CHANNEL(scp),
+		"megaraid: aborting cmd=%x <c=%d t=%d l=%d>\n",
+		scp->cmnd[0], SCP2CHANNEL(scp),
 		SCP2TARGET(scp), SCP2LUN(scp)));
 
 	// If FW has stopped responding, simply return failure
@@ -2496,9 +2496,8 @@ megaraid_abort_handler(struct scsi_cmnd *scp)
 			list_del_init(&scb->list);	// from completed list
 
 			con_log(CL_ANN, (KERN_WARNING
-			"megaraid: %ld:%d[%d:%d], abort from completed list\n",
-				scp->serial_number, scb->sno,
-				scb->dev_channel, scb->dev_target));
+			"megaraid: %d[%d:%d], abort from completed list\n",
+				scb->sno, scb->dev_channel, scb->dev_target));
 
 			scp->result = (DID_ABORT << 16);
 			scp->scsi_done(scp);
@@ -2527,9 +2526,8 @@ megaraid_abort_handler(struct scsi_cmnd *scp)
 			ASSERT(!(scb->state & SCB_ISSUED));
 
 			con_log(CL_ANN, (KERN_WARNING
-				"megaraid abort: %ld[%d:%d], driver owner\n",
-				scp->serial_number, scb->dev_channel,
-				scb->dev_target));
+				"megaraid abort: [%d:%d], driver owner\n",
+				scb->dev_channel, scb->dev_target));
 
 			scp->result = (DID_ABORT << 16);
 			scp->scsi_done(scp);
@@ -2560,25 +2558,21 @@ megaraid_abort_handler(struct scsi_cmnd *scp)
 
 			if (!(scb->state & SCB_ISSUED)) {
 				con_log(CL_ANN, (KERN_WARNING
-				"megaraid abort: %ld%d[%d:%d], invalid state\n",
-				scp->serial_number, scb->sno, scb->dev_channel,
-				scb->dev_target));
+				"megaraid abort: %d[%d:%d], invalid state\n",
+				scb->sno, scb->dev_channel, scb->dev_target));
 				BUG();
 			}
 			else {
 				con_log(CL_ANN, (KERN_WARNING
-				"megaraid abort: %ld:%d[%d:%d], fw owner\n",
-				scp->serial_number, scb->sno, scb->dev_channel,
-				scb->dev_target));
+				"megaraid abort: %d[%d:%d], fw owner\n",
+				scb->sno, scb->dev_channel, scb->dev_target));
 			}
 		}
 	}
 	spin_unlock_irq(&adapter->lock);
 
 	if (!found) {
-		con_log(CL_ANN, (KERN_WARNING
-			"megaraid abort: scsi cmd:%ld, do now own\n",
-			scp->serial_number));
+		con_log(CL_ANN, (KERN_WARNING "megaraid abort: do now own\n"));
 
 		// FIXME: Should there be a callback for this command?
 		return SUCCESS;
@@ -2649,9 +2643,8 @@ megaraid_reset_handler(struct scsi_cmnd *scp)
 		} else {
 			if (scb->scp == scp) {	// Found command
 				con_log(CL_ANN, (KERN_WARNING
-					"megaraid: %ld:%d[%d:%d], reset from pending list\n",
-					scp->serial_number, scb->sno,
-					scb->dev_channel, scb->dev_target));
+					"megaraid: %d[%d:%d], reset from pending list\n",
+					scb->sno, scb->dev_channel, scb->dev_target));
 			} else {
 				con_log(CL_ANN, (KERN_WARNING
 				"megaraid: IO packet with %d[%d:%d] being reset\n",
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 66d4cea4df98..89c623ebadbc 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1751,10 +1751,9 @@ static int megasas_wait_for_outstanding(struct megasas_instance *instance)
 			list_del_init(&reset_cmd->list);
 			if (reset_cmd->scmd) {
 				reset_cmd->scmd->result = DID_RESET << 16;
-				printk(KERN_NOTICE "%d:%p reset [%02x], %#lx\n",
+				printk(KERN_NOTICE "%d:%p reset [%02x]\n",
 					reset_index, reset_cmd,
-					reset_cmd->scmd->cmnd[0],
-					reset_cmd->scmd->serial_number);
+					reset_cmd->scmd->cmnd[0]);
 
 				reset_cmd->scmd->scsi_done(reset_cmd->scmd);
 				megasas_return_cmd(instance, reset_cmd);
@@ -1879,8 +1878,8 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd)
 
 	instance = (struct megasas_instance *)scmd->device->host->hostdata;
 
-	scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x retries=%x\n",
-		 scmd->serial_number, scmd->cmnd[0], scmd->retries);
+	scmd_printk(KERN_NOTICE, scmd, "megasas: RESET cmd=%x retries=%x\n",
+		 scmd->cmnd[0], scmd->retries);
 
 	if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) {
 		printk(KERN_ERR "megasas: cannot recover from previous reset "
@@ -2349,9 +2348,9 @@ megasas_issue_pending_cmds_again(struct megasas_instance *instance)
 							cmd->frame_phys_addr ,
 							0, instance->reg_set);
 		} else if (cmd->scmd) {
-			printk(KERN_NOTICE "megasas: %p scsi cmd [%02x],%#lx"
+			printk(KERN_NOTICE "megasas: %p scsi cmd [%02x]"
 			"detected on the internal queue, issue again.\n",
-			cmd, cmd->scmd->cmnd[0], cmd->scmd->serial_number);
+			cmd, cmd->scmd->cmnd[0]);
 
 			atomic_inc(&instance->fw_outstanding);
 			instance->instancet->fire_cmd(instance,
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 197aa1b3f0f3..494474779532 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -415,8 +415,7 @@ static void mesh_start_cmd(struct mesh_state *ms, struct scsi_cmnd *cmd)
 #if 1
 	if (DEBUG_TARGET(cmd)) {
 		int i;
-		printk(KERN_DEBUG "mesh_start: %p ser=%lu tgt=%d cmd=",
-		       cmd, cmd->serial_number, id);
+		printk(KERN_DEBUG "mesh_start: %p tgt=%d cmd=", cmd, id);
 		for (i = 0; i < cmd->cmd_len; ++i)
 			printk(" %x", cmd->cmnd[i]);
 		printk(" use_sg=%d buffer=%p bufflen=%u\n",
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index 3346357031e9..efa0255491c2 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -522,7 +522,8 @@ _base_display_event_data(struct MPT2SAS_ADAPTER *ioc,
 		desc = "Device Status Change";
 		break;
 	case MPI2_EVENT_IR_OPERATION_STATUS:
-		desc = "IR Operation Status";
+		if (!ioc->hide_ir_msg)
+			desc = "IR Operation Status";
 		break;
 	case MPI2_EVENT_SAS_DISCOVERY:
 	{
@@ -553,16 +554,20 @@ _base_display_event_data(struct MPT2SAS_ADAPTER *ioc,
 		desc = "SAS Enclosure Device Status Change";
 		break;
 	case MPI2_EVENT_IR_VOLUME:
-		desc = "IR Volume";
+		if (!ioc->hide_ir_msg)
+			desc = "IR Volume";
 		break;
 	case MPI2_EVENT_IR_PHYSICAL_DISK:
-		desc = "IR Physical Disk";
+		if (!ioc->hide_ir_msg)
+			desc = "IR Physical Disk";
 		break;
 	case MPI2_EVENT_IR_CONFIGURATION_CHANGE_LIST:
-		desc = "IR Configuration Change List";
+		if (!ioc->hide_ir_msg)
+			desc = "IR Configuration Change List";
 		break;
 	case MPI2_EVENT_LOG_ENTRY_ADDED:
-		desc = "Log Entry Added";
+		if (!ioc->hide_ir_msg)
+			desc = "Log Entry Added";
 		break;
 	}
 
@@ -616,7 +621,10 @@ _base_sas_log_info(struct MPT2SAS_ADAPTER *ioc , u32 log_info)
 		originator_str = "PL";
 		break;
 	case 2:
-		originator_str = "IR";
+		if (!ioc->hide_ir_msg)
+			originator_str = "IR";
+		else
+			originator_str = "WarpDrive";
 		break;
 	}
 
@@ -1508,6 +1516,7 @@ mpt2sas_base_free_smid(struct MPT2SAS_ADAPTER *ioc, u16 smid)
 		}
 		ioc->scsi_lookup[i].cb_idx = 0xFF;
 		ioc->scsi_lookup[i].scmd = NULL;
+		ioc->scsi_lookup[i].direct_io = 0;
 		list_add_tail(&ioc->scsi_lookup[i].tracker_list,
 		    &ioc->free_list);
 		spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
@@ -1844,10 +1853,12 @@ _base_display_ioc_capabilities(struct MPT2SAS_ADAPTER *ioc)
 	printk("), ");
 	printk("Capabilities=(");
 
-	if (ioc->facts.IOCCapabilities &
-	    MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) {
-		printk("Raid");
-		i++;
+	if (!ioc->hide_ir_msg) {
+		if (ioc->facts.IOCCapabilities &
+		    MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) {
+			printk("Raid");
+			i++;
+		}
 	}
 
 	if (ioc->facts.IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) {
@@ -3680,6 +3691,7 @@ _base_make_ioc_operational(struct MPT2SAS_ADAPTER *ioc, int sleep_flag)
 	u32 reply_address;
 	u16 smid;
 	struct _tr_list *delayed_tr, *delayed_tr_next;
+	u8 hide_flag;
 
 	dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
 	    __func__));
@@ -3706,6 +3718,7 @@ _base_make_ioc_operational(struct MPT2SAS_ADAPTER *ioc, int sleep_flag)
 		ioc->scsi_lookup[i].cb_idx = 0xFF;
 		ioc->scsi_lookup[i].smid = smid;
 		ioc->scsi_lookup[i].scmd = NULL;
+		ioc->scsi_lookup[i].direct_io = 0;
 		list_add_tail(&ioc->scsi_lookup[i].tracker_list,
 		    &ioc->free_list);
 	}
@@ -3766,6 +3779,15 @@ _base_make_ioc_operational(struct MPT2SAS_ADAPTER *ioc, int sleep_flag)
 	if (sleep_flag == CAN_SLEEP)
 		_base_static_config_pages(ioc);
 
+	if (ioc->wait_for_port_enable_to_complete && ioc->is_warpdrive) {
+		if (ioc->manu_pg10.OEMIdentifier  == 0x80) {
+			hide_flag = (u8) (ioc->manu_pg10.OEMSpecificFlags0 &
+			    MFG_PAGE10_HIDE_SSDS_MASK);
+			if (hide_flag != MFG_PAGE10_HIDE_SSDS_MASK)
+				ioc->mfg_pg10_hide_flag = hide_flag;
+		}
+	}
+
 	if (ioc->wait_for_port_enable_to_complete) {
 		if (diag_buffer_enable != 0)
 			mpt2sas_enable_diag_buffer(ioc, diag_buffer_enable);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 500328245f61..2a3c05f6db8b 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -69,11 +69,11 @@
 #define MPT2SAS_DRIVER_NAME		"mpt2sas"
 #define MPT2SAS_AUTHOR	"LSI Corporation <DL-MPTFusionLinux@lsi.com>"
 #define MPT2SAS_DESCRIPTION	"LSI MPT Fusion SAS 2.0 Device Driver"
-#define MPT2SAS_DRIVER_VERSION		"08.100.00.00"
+#define MPT2SAS_DRIVER_VERSION		"08.100.00.01"
 #define MPT2SAS_MAJOR_VERSION		08
 #define MPT2SAS_MINOR_VERSION		100
 #define MPT2SAS_BUILD_VERSION		00
-#define MPT2SAS_RELEASE_VERSION		00
+#define MPT2SAS_RELEASE_VERSION		01
 
 /*
  * Set MPT2SAS_SG_DEPTH value based on user input.
@@ -189,6 +189,16 @@
 #define MPT2SAS_HP_DAUGHTER_2_4_INTERNAL_SSDID        0x0046
 
 /*
+ *  WarpDrive Specific Log codes
+ */
+
+#define MPT2_WARPDRIVE_LOGENTRY		(0x8002)
+#define MPT2_WARPDRIVE_LC_SSDT		(0x41)
+#define MPT2_WARPDRIVE_LC_SSDLW		(0x43)
+#define MPT2_WARPDRIVE_LC_SSDLF		(0x44)
+#define MPT2_WARPDRIVE_LC_BRMF		(0x4D)
+
+/*
  * per target private data
  */
 #define MPT_TARGET_FLAGS_RAID_COMPONENT	0x01
@@ -199,6 +209,7 @@
  * struct MPT2SAS_TARGET - starget private hostdata
  * @starget: starget object
  * @sas_address: target sas address
+ * @raid_device: raid_device pointer to access volume data
  * @handle: device handle
  * @num_luns: number luns
  * @flags: MPT_TARGET_FLAGS_XXX flags
@@ -208,6 +219,7 @@
 struct MPT2SAS_TARGET {
 	struct scsi_target *starget;
 	u64	sas_address;
+	struct _raid_device *raid_device;
 	u16	handle;
 	int	num_luns;
 	u32	flags;
@@ -215,6 +227,7 @@ struct MPT2SAS_TARGET {
 	u8	tm_busy;
 };
 
+
 /*
  * per device private data
  */
@@ -262,6 +275,12 @@ typedef struct _MPI2_CONFIG_PAGE_MAN_10 {
   MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_10,
   Mpi2ManufacturingPage10_t, MPI2_POINTER pMpi2ManufacturingPage10_t;
 
+#define MFG_PAGE10_HIDE_SSDS_MASK	(0x00000003)
+#define MFG_PAGE10_HIDE_ALL_DISKS	(0x00)
+#define MFG_PAGE10_EXPOSE_ALL_DISKS	(0x01)
+#define MFG_PAGE10_HIDE_IF_VOL_PRESENT	(0x02)
+
+
 struct MPT2SAS_DEVICE {
 	struct MPT2SAS_TARGET *sas_target;
 	unsigned int	lun;
@@ -341,6 +360,7 @@ struct _sas_device {
  * @sdev: scsi device struct (volumes are single lun)
  * @wwid: unique identifier for the volume
  * @handle: device handle
+ * @block_size: Block size of the volume
  * @id: target id
  * @channel: target channel
  * @volume_type: the raid level
@@ -348,20 +368,33 @@ struct _sas_device {
  * @num_pds: number of hidden raid components
  * @responding: used in _scsih_raid_device_mark_responding
  * @percent_complete: resync percent complete
+ * @direct_io_enabled: Whether direct io to PDs are allowed or not
+ * @stripe_exponent: X where 2powX is the stripe sz in blocks
+ * @max_lba: Maximum number of LBA in the volume
+ * @stripe_sz: Stripe Size of the volume
+ * @device_info: Device info of the volume member disk
+ * @pd_handle: Array of handles of the physical drives for direct I/O in le16
  */
+#define MPT_MAX_WARPDRIVE_PDS		8
 struct _raid_device {
 	struct list_head list;
 	struct scsi_target *starget;
 	struct scsi_device *sdev;
 	u64	wwid;
 	u16	handle;
+	u16	block_sz;
 	int	id;
 	int	channel;
 	u8	volume_type;
-	u32	device_info;
 	u8	num_pds;
 	u8	responding;
 	u8	percent_complete;
+	u8	direct_io_enabled;
+	u8	stripe_exponent;
+	u64	max_lba;
+	u32	stripe_sz;
+	u32	device_info;
+	u16	pd_handle[MPT_MAX_WARPDRIVE_PDS];
 };
 
 /**
@@ -470,6 +503,7 @@ struct chain_tracker {
  * @smid: system message id
  * @scmd: scsi request pointer
  * @cb_idx: callback index
+ * @direct_io: To indicate whether I/O is direct (WARPDRIVE)
  * @chain_list: list of chains associated to this IO
  * @tracker_list: list of free request (ioc->free_list)
  */
@@ -477,14 +511,14 @@ struct scsiio_tracker {
 	u16	smid;
 	struct scsi_cmnd *scmd;
 	u8	cb_idx;
+	u8	direct_io;
 	struct list_head chain_list;
 	struct list_head tracker_list;
 };
 
 /**
- * struct request_tracker - misc mf request tracker
+ * struct request_tracker - firmware request tracker
  * @smid: system message id
- * @scmd: scsi request pointer
  * @cb_idx: callback index
  * @tracker_list: list of free request (ioc->free_list)
  */
@@ -832,6 +866,11 @@ struct MPT2SAS_ADAPTER {
 	u32		diagnostic_flags[MPI2_DIAG_BUF_TYPE_COUNT];
 	u32		ring_buffer_offset;
 	u32		ring_buffer_sz;
+	u8		is_warpdrive;
+	u8		hide_ir_msg;
+	u8		mfg_pg10_hide_flag;
+	u8		hide_drives;
+
 };
 
 typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index 1c6d2b405eef..437c2d94c45a 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
@@ -688,6 +688,13 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc,
 		goto out;
 	}
 
+	/* Check for overflow and wraparound */
+	if (karg.data_sge_offset * 4 > ioc->request_sz ||
+	    karg.data_sge_offset > (UINT_MAX / 4)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	/* copy in request message frame from user */
 	if (copy_from_user(mpi_request, mf, karg.data_sge_offset*4)) {
 		printk(KERN_ERR "failure at %s:%d/%s()!\n", __FILE__, __LINE__,
@@ -1034,7 +1041,10 @@ _ctl_getiocinfo(void __user *arg)
 	    __func__));
 
 	memset(&karg, 0 , sizeof(karg));
-	karg.adapter_type = MPT2_IOCTL_INTERFACE_SAS2;
+	if (ioc->is_warpdrive)
+		karg.adapter_type = MPT2_IOCTL_INTERFACE_SAS2_SSS6200;
+	else
+		karg.adapter_type = MPT2_IOCTL_INTERFACE_SAS2;
 	if (ioc->pfacts)
 		karg.port_number = ioc->pfacts[0].PortNumber;
 	pci_read_config_byte(ioc->pdev, PCI_CLASS_REVISION, &revision);
@@ -1963,7 +1973,7 @@ _ctl_diag_read_buffer(void __user *arg, enum block_state state)
 	Mpi2DiagBufferPostReply_t *mpi_reply;
 	int rc, i;
 	u8 buffer_type;
-	unsigned long timeleft;
+	unsigned long timeleft, request_size, copy_size;
 	u16 smid;
 	u16 ioc_status;
 	u8 issue_reset = 0;
@@ -1999,6 +2009,8 @@ _ctl_diag_read_buffer(void __user *arg, enum block_state state)
 		return -ENOMEM;
 	}
 
+	request_size = ioc->diag_buffer_sz[buffer_type];
+
 	if ((karg.starting_offset % 4) || (karg.bytes_to_read % 4)) {
 		printk(MPT2SAS_ERR_FMT "%s: either the starting_offset "
 		    "or bytes_to_read are not 4 byte aligned\n", ioc->name,
@@ -2006,13 +2018,23 @@ _ctl_diag_read_buffer(void __user *arg, enum block_state state)
 		return -EINVAL;
 	}
 
+	if (karg.starting_offset > request_size)
+		return -EINVAL;
+
 	diag_data = (void *)(request_data + karg.starting_offset);
 	dctlprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: diag_buffer(%p), "
 	    "offset(%d), sz(%d)\n", ioc->name, __func__,
 	    diag_data, karg.starting_offset, karg.bytes_to_read));
 
+	/* Truncate data on requests that are too large */
+	if ((diag_data + karg.bytes_to_read < diag_data) ||
+	    (diag_data + karg.bytes_to_read > request_data + request_size))
+		copy_size = request_size - karg.starting_offset;
+	else
+		copy_size = karg.bytes_to_read;
+
 	if (copy_to_user((void __user *)uarg->diagnostic_data,
-	    diag_data, karg.bytes_to_read)) {
+	    diag_data, copy_size)) {
 		printk(MPT2SAS_ERR_FMT "%s: Unable to write "
 		    "mpt_diag_read_buffer_t data @ %p\n", ioc->name,
 		    __func__, diag_data);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.h b/drivers/scsi/mpt2sas/mpt2sas_ctl.h
index 69916e46e04f..11ff1d5fb8f0 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.h
@@ -133,6 +133,7 @@ struct mpt2_ioctl_pci_info {
 #define MPT2_IOCTL_INTERFACE_FC_IP	(0x02)
 #define MPT2_IOCTL_INTERFACE_SAS	(0x03)
 #define MPT2_IOCTL_INTERFACE_SAS2	(0x04)
+#define MPT2_IOCTL_INTERFACE_SAS2_SSS6200	(0x05)
 #define MPT2_IOCTL_VERSION_LENGTH	(32)
 
 /**
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index d2064a0533ae..f12e02358d6d 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -233,6 +233,9 @@ static struct pci_device_id scsih_pci_table[] = {
 		PCI_ANY_ID, PCI_ANY_ID },
 	{ MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_3,
 		PCI_ANY_ID, PCI_ANY_ID },
+	/* SSS6200 */
+	{ MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SSS6200,
+		PCI_ANY_ID, PCI_ANY_ID },
 	{0}	/* Terminating entry */
 };
 MODULE_DEVICE_TABLE(pci, scsih_pci_table);
@@ -1256,6 +1259,7 @@ _scsih_target_alloc(struct scsi_target *starget)
 			sas_target_priv_data->handle = raid_device->handle;
 			sas_target_priv_data->sas_address = raid_device->wwid;
 			sas_target_priv_data->flags |= MPT_TARGET_FLAGS_VOLUME;
+			sas_target_priv_data->raid_device = raid_device;
 			raid_device->starget = starget;
 		}
 		spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
@@ -1455,7 +1459,10 @@ static int
 _scsih_is_raid(struct device *dev)
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
+	struct MPT2SAS_ADAPTER *ioc = shost_priv(sdev->host);
 
+	if (ioc->is_warpdrive)
+		return 0;
 	return (sdev->channel == RAID_CHANNEL) ? 1 : 0;
 }
 
@@ -1480,7 +1487,7 @@ _scsih_get_resync(struct device *dev)
 	    sdev->channel);
 	spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
 
-	if (!raid_device)
+	if (!raid_device || ioc->is_warpdrive)
 		goto out;
 
 	if (mpt2sas_config_get_raid_volume_pg0(ioc, &mpi_reply, &vol_pg0,
@@ -1640,6 +1647,212 @@ _scsih_get_volume_capabilities(struct MPT2SAS_ADAPTER *ioc,
 
 	kfree(vol_pg0);
 }
+/**
+ * _scsih_disable_ddio - Disable direct I/O for all the volumes
+ * @ioc: per adapter object
+ */
+static void
+_scsih_disable_ddio(struct MPT2SAS_ADAPTER *ioc)
+{
+	Mpi2RaidVolPage1_t vol_pg1;
+	Mpi2ConfigReply_t mpi_reply;
+	struct _raid_device *raid_device;
+	u16 handle;
+	u16 ioc_status;
+
+	handle = 0xFFFF;
+	while (!(mpt2sas_config_get_raid_volume_pg1(ioc, &mpi_reply,
+	    &vol_pg1, MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE, handle))) {
+		ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
+		    MPI2_IOCSTATUS_MASK;
+		if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE)
+			break;
+		handle = le16_to_cpu(vol_pg1.DevHandle);
+		raid_device = _scsih_raid_device_find_by_handle(ioc, handle);
+		if (raid_device)
+			raid_device->direct_io_enabled = 0;
+	}
+	return;
+}
+
+
+/**
+ * _scsih_get_num_volumes - Get number of volumes in the ioc
+ * @ioc: per adapter object
+ */
+static u8
+_scsih_get_num_volumes(struct MPT2SAS_ADAPTER *ioc)
+{
+	Mpi2RaidVolPage1_t vol_pg1;
+	Mpi2ConfigReply_t mpi_reply;
+	u16 handle;
+	u8 vol_cnt = 0;
+	u16 ioc_status;
+
+	handle = 0xFFFF;
+	while (!(mpt2sas_config_get_raid_volume_pg1(ioc, &mpi_reply,
+	    &vol_pg1, MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE, handle))) {
+		ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
+		    MPI2_IOCSTATUS_MASK;
+		if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE)
+			break;
+		vol_cnt++;
+		handle = le16_to_cpu(vol_pg1.DevHandle);
+	}
+	return vol_cnt;
+}
+
+
+/**
+ * _scsih_init_warpdrive_properties - Set properties for warpdrive direct I/O.
+ * @ioc: per adapter object
+ * @raid_device: the raid_device object
+ */
+static void
+_scsih_init_warpdrive_properties(struct MPT2SAS_ADAPTER *ioc,
+	struct _raid_device *raid_device)
+{
+	Mpi2RaidVolPage0_t *vol_pg0;
+	Mpi2RaidPhysDiskPage0_t pd_pg0;
+	Mpi2ConfigReply_t mpi_reply;
+	u16 sz;
+	u8 num_pds, count;
+	u64 mb = 1024 * 1024;
+	u64 tb_2 = 2 * mb * mb;
+	u64 capacity;
+	u32 stripe_sz;
+	u8 i, stripe_exp;
+
+	if (!ioc->is_warpdrive)
+		return;
+
+	if (ioc->mfg_pg10_hide_flag ==  MFG_PAGE10_EXPOSE_ALL_DISKS) {
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		    "globally as drives are exposed\n", ioc->name);
+		return;
+	}
+	if (_scsih_get_num_volumes(ioc) > 1) {
+		_scsih_disable_ddio(ioc);
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		    "globally as number of drives > 1\n", ioc->name);
+		return;
+	}
+	if ((mpt2sas_config_get_number_pds(ioc, raid_device->handle,
+	    &num_pds)) || !num_pds) {
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		    "Failure in computing number of drives\n", ioc->name);
+		return;
+	}
+
+	sz = offsetof(Mpi2RaidVolPage0_t, PhysDisk) + (num_pds *
+	    sizeof(Mpi2RaidVol0PhysDisk_t));
+	vol_pg0 = kzalloc(sz, GFP_KERNEL);
+	if (!vol_pg0) {
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		    "Memory allocation failure for RVPG0\n", ioc->name);
+		return;
+	}
+
+	if ((mpt2sas_config_get_raid_volume_pg0(ioc, &mpi_reply, vol_pg0,
+	     MPI2_RAID_VOLUME_PGAD_FORM_HANDLE, raid_device->handle, sz))) {
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		    "Failure in retrieving RVPG0\n", ioc->name);
+		kfree(vol_pg0);
+		return;
+	}
+
+	/*
+	 * WARPDRIVE:If number of physical disks in a volume exceeds the max pds
+	 * assumed for WARPDRIVE, disable direct I/O
+	 */
+	if (num_pds > MPT_MAX_WARPDRIVE_PDS) {
+		printk(MPT2SAS_WARN_FMT "WarpDrive : Direct IO is disabled "
+		    "for the drive with handle(0x%04x): num_mem=%d, "
+		    "max_mem_allowed=%d\n", ioc->name, raid_device->handle,
+		    num_pds, MPT_MAX_WARPDRIVE_PDS);
+		kfree(vol_pg0);
+		return;
+	}
+	for (count = 0; count < num_pds; count++) {
+		if (mpt2sas_config_get_phys_disk_pg0(ioc, &mpi_reply,
+		    &pd_pg0, MPI2_PHYSDISK_PGAD_FORM_PHYSDISKNUM,
+		    vol_pg0->PhysDisk[count].PhysDiskNum) ||
+		    pd_pg0.DevHandle == MPT2SAS_INVALID_DEVICE_HANDLE) {
+			printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is "
+			    "disabled for the drive with handle(0x%04x) member"
+			    "handle retrieval failed for member number=%d\n",
+			    ioc->name, raid_device->handle,
+			    vol_pg0->PhysDisk[count].PhysDiskNum);
+			goto out_error;
+		}
+		raid_device->pd_handle[count] = le16_to_cpu(pd_pg0.DevHandle);
+	}
+
+	/*
+	 * Assumption for WD: Direct I/O is not supported if the volume is
+	 * not RAID0, if the stripe size is not 64KB, if the block size is
+	 * not 512 and if the volume size is >2TB
+	 */
+	if (raid_device->volume_type != MPI2_RAID_VOL_TYPE_RAID0 ||
+	    le16_to_cpu(vol_pg0->BlockSize) != 512) {
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		    "for the drive with handle(0x%04x): type=%d, "
+		    "s_sz=%uK, blk_size=%u\n", ioc->name,
+		    raid_device->handle, raid_device->volume_type,
+		    le32_to_cpu(vol_pg0->StripeSize)/2,
+		    le16_to_cpu(vol_pg0->BlockSize));
+		goto out_error;
+	}
+
+	capacity = (u64) le16_to_cpu(vol_pg0->BlockSize) *
+	    (le64_to_cpu(vol_pg0->MaxLBA) + 1);
+
+	if (capacity > tb_2) {
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		"for the drive with handle(0x%04x) since drive sz > 2TB\n",
+		ioc->name, raid_device->handle);
+		goto out_error;
+	}
+
+	stripe_sz = le32_to_cpu(vol_pg0->StripeSize);
+	stripe_exp = 0;
+	for (i = 0; i < 32; i++) {
+		if (stripe_sz & 1)
+			break;
+		stripe_exp++;
+		stripe_sz >>= 1;
+	}
+	if (i == 32) {
+		printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is disabled "
+		    "for the drive with handle(0x%04x) invalid stripe sz %uK\n",
+		    ioc->name, raid_device->handle,
+		    le32_to_cpu(vol_pg0->StripeSize)/2);
+		goto out_error;
+	}
+	raid_device->stripe_exponent = stripe_exp;
+	raid_device->direct_io_enabled = 1;
+
+	printk(MPT2SAS_INFO_FMT "WarpDrive : Direct IO is Enabled for the drive"
+	    " with handle(0x%04x)\n", ioc->name, raid_device->handle);
+	/*
+	 * WARPDRIVE: Though the following fields are not used for direct IO,
+	 * stored for future purpose:
+	 */
+	raid_device->max_lba = le64_to_cpu(vol_pg0->MaxLBA);
+	raid_device->stripe_sz = le32_to_cpu(vol_pg0->StripeSize);
+	raid_device->block_sz = le16_to_cpu(vol_pg0->BlockSize);
+
+
+	kfree(vol_pg0);
+	return;
+
+out_error:
+	raid_device->direct_io_enabled = 0;
+	for (count = 0; count < num_pds; count++)
+		raid_device->pd_handle[count] = 0;
+	kfree(vol_pg0);
+	return;
+}
 
 /**
  * _scsih_enable_tlr - setting TLR flags
@@ -1710,6 +1923,11 @@ _scsih_slave_configure(struct scsi_device *sdev)
 
 		_scsih_get_volume_capabilities(ioc, raid_device);
 
+		/*
+		 * WARPDRIVE: Initialize the required data for Direct IO
+		 */
+		_scsih_init_warpdrive_properties(ioc, raid_device);
+
 		/* RAID Queue Depth Support
 		 * IS volume = underlying qdepth of drive type, either
 		 *    MPT2SAS_SAS_QUEUE_DEPTH or MPT2SAS_SATA_QUEUE_DEPTH
@@ -1757,14 +1975,16 @@ _scsih_slave_configure(struct scsi_device *sdev)
 			break;
 		}
 
-		sdev_printk(KERN_INFO, sdev, "%s: "
-		    "handle(0x%04x), wwid(0x%016llx), pd_count(%d), type(%s)\n",
-		    r_level, raid_device->handle,
-		    (unsigned long long)raid_device->wwid,
-		    raid_device->num_pds, ds);
+		if (!ioc->hide_ir_msg)
+			sdev_printk(KERN_INFO, sdev, "%s: handle(0x%04x), "
+			    "wwid(0x%016llx), pd_count(%d), type(%s)\n",
+			    r_level, raid_device->handle,
+			    (unsigned long long)raid_device->wwid,
+			    raid_device->num_pds, ds);
 		_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
 		/* raid transport support */
-		_scsih_set_level(sdev, raid_device);
+		if (!ioc->is_warpdrive)
+			_scsih_set_level(sdev, raid_device);
 		return 0;
 	}
 
@@ -2133,8 +2353,7 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel,
 	switch (type) {
 	case MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK:
 		scmd_lookup = _scsih_scsi_lookup_get(ioc, smid_task);
-		if (scmd_lookup && (scmd_lookup->serial_number ==
-		    scmd->serial_number))
+		if (scmd_lookup)
 			rc = FAILED;
 		else
 			rc = SUCCESS;
@@ -2182,16 +2401,20 @@ _scsih_tm_display_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd)
 	struct MPT2SAS_TARGET *priv_target = starget->hostdata;
 	struct _sas_device *sas_device = NULL;
 	unsigned long flags;
+	char *device_str = NULL;
 
 	if (!priv_target)
 		return;
+	if (ioc->hide_ir_msg)
+		device_str = "WarpDrive";
+	else
+		device_str = "volume";
 
 	scsi_print_command(scmd);
 	if (priv_target->flags & MPT_TARGET_FLAGS_VOLUME) {
-		starget_printk(KERN_INFO, starget, "volume handle(0x%04x), "
-		    "volume wwid(0x%016llx)\n",
-		    priv_target->handle,
-		    (unsigned long long)priv_target->sas_address);
+		starget_printk(KERN_INFO, starget, "%s handle(0x%04x), "
+		    "%s wwid(0x%016llx)\n", device_str, priv_target->handle,
+		    device_str, (unsigned long long)priv_target->sas_address);
 	} else {
 		spin_lock_irqsave(&ioc->sas_device_lock, flags);
 		sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
@@ -3130,6 +3353,9 @@ _scsih_check_ir_config_unhide_events(struct MPT2SAS_ADAPTER *ioc,
 	a = 0;
 	b = 0;
 
+	if (ioc->is_warpdrive)
+		return;
+
 	/* Volume Resets for Deleted or Removed */
 	element = (Mpi2EventIrConfigElement_t *)&event_data->ConfigElement[0];
 	for (i = 0; i < event_data->NumElements; i++, element++) {
@@ -3347,6 +3573,105 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
 }
 
 /**
+ * _scsih_scsi_direct_io_get - returns direct io flag
+ * @ioc: per adapter object
+ * @smid: system request message index
+ *
+ * Returns the smid stored scmd pointer.
+ */
+static inline u8
+_scsih_scsi_direct_io_get(struct MPT2SAS_ADAPTER *ioc, u16 smid)
+{
+	return ioc->scsi_lookup[smid - 1].direct_io;
+}
+
+/**
+ * _scsih_scsi_direct_io_set - sets direct io flag
+ * @ioc: per adapter object
+ * @smid: system request message index
+ * @direct_io: Zero or non-zero value to set in the direct_io flag
+ *
+ * Returns Nothing.
+ */
+static inline void
+_scsih_scsi_direct_io_set(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 direct_io)
+{
+	ioc->scsi_lookup[smid - 1].direct_io = direct_io;
+}
+
+
+/**
+ * _scsih_setup_direct_io - setup MPI request for WARPDRIVE Direct I/O
+ * @ioc: per adapter object
+ * @scmd: pointer to scsi command object
+ * @raid_device: pointer to raid device data structure
+ * @mpi_request: pointer to the SCSI_IO reqest message frame
+ * @smid: system request message index
+ *
+ * Returns nothing
+ */
+static void
+_scsih_setup_direct_io(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
+	struct _raid_device *raid_device, Mpi2SCSIIORequest_t *mpi_request,
+	u16 smid)
+{
+	u32 v_lba, p_lba, stripe_off, stripe_unit, column, io_size;
+	u32 stripe_sz, stripe_exp;
+	u8 num_pds, *cdb_ptr, *tmp_ptr, *lba_ptr1, *lba_ptr2;
+	u8 cdb0 = scmd->cmnd[0];
+
+	/*
+	 * Try Direct I/O to RAID memeber disks
+	 */
+	if (cdb0 == READ_16 || cdb0 == READ_10 ||
+	    cdb0 == WRITE_16 || cdb0 == WRITE_10) {
+		cdb_ptr = mpi_request->CDB.CDB32;
+
+		if ((cdb0 < READ_16) || !(cdb_ptr[2] | cdb_ptr[3] | cdb_ptr[4]
+			| cdb_ptr[5])) {
+			io_size = scsi_bufflen(scmd) >> 9;
+			/* get virtual lba */
+			lba_ptr1 = lba_ptr2 = (cdb0 < READ_16) ? &cdb_ptr[2] :
+			    &cdb_ptr[6];
+			tmp_ptr = (u8 *)&v_lba + 3;
+			*tmp_ptr-- = *lba_ptr1++;
+			*tmp_ptr-- = *lba_ptr1++;
+			*tmp_ptr-- = *lba_ptr1++;
+			*tmp_ptr = *lba_ptr1;
+
+			if (((u64)v_lba + (u64)io_size - 1) <=
+			    (u32)raid_device->max_lba) {
+				stripe_sz = raid_device->stripe_sz;
+				stripe_exp = raid_device->stripe_exponent;
+				stripe_off = v_lba & (stripe_sz - 1);
+
+				/* Check whether IO falls within a stripe */
+				if ((stripe_off + io_size) <= stripe_sz) {
+					num_pds = raid_device->num_pds;
+					p_lba = v_lba >> stripe_exp;
+					stripe_unit = p_lba / num_pds;
+					column = p_lba % num_pds;
+					p_lba = (stripe_unit << stripe_exp) +
+					    stripe_off;
+					mpi_request->DevHandle =
+						cpu_to_le16(raid_device->
+						    pd_handle[column]);
+					tmp_ptr = (u8 *)&p_lba + 3;
+					*lba_ptr2++ = *tmp_ptr--;
+					*lba_ptr2++ = *tmp_ptr--;
+					*lba_ptr2++ = *tmp_ptr--;
+					*lba_ptr2 = *tmp_ptr;
+					/*
+					* WD: To indicate this I/O is directI/O
+					*/
+					_scsih_scsi_direct_io_set(ioc, smid, 1);
+				}
+			}
+		}
+	}
+}
+
+/**
  * _scsih_qcmd - main scsi request entry point
  * @scmd: pointer to scsi command object
  * @done: function pointer to be invoked on completion
@@ -3363,6 +3688,7 @@ _scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
 	struct MPT2SAS_TARGET *sas_target_priv_data;
+	struct _raid_device *raid_device;
 	Mpi2SCSIIORequest_t *mpi_request;
 	u32 mpi_control;
 	u16 smid;
@@ -3424,8 +3750,10 @@ _scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 
 	} else
 		mpi_control |= MPI2_SCSIIO_CONTROL_SIMPLEQ;
-	/* Make sure Device is not raid volume */
-	if (!_scsih_is_raid(&scmd->device->sdev_gendev) &&
+	/* Make sure Device is not raid volume.
+	 * We do not expose raid functionality to upper layer for warpdrive.
+	 */
+	if (!ioc->is_warpdrive && !_scsih_is_raid(&scmd->device->sdev_gendev) &&
 	    sas_is_tlr_enabled(scmd->device) && scmd->cmd_len != 32)
 		mpi_control |= MPI2_SCSIIO_CONTROL_TLR_ON;
 
@@ -3473,9 +3801,14 @@ _scsih_qcmd_lck(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 		}
 	}
 
+	raid_device = sas_target_priv_data->raid_device;
+	if (raid_device && raid_device->direct_io_enabled)
+		_scsih_setup_direct_io(ioc, scmd, raid_device, mpi_request,
+		    smid);
+
 	if (likely(mpi_request->Function == MPI2_FUNCTION_SCSI_IO_REQUEST))
 		mpt2sas_base_put_smid_scsi_io(ioc, smid,
-		    sas_device_priv_data->sas_target->handle);
+		    le16_to_cpu(mpi_request->DevHandle));
 	else
 		mpt2sas_base_put_smid_default(ioc, smid);
 	return 0;
@@ -3540,10 +3873,16 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
 	unsigned long flags;
 	struct scsi_target *starget = scmd->device->sdev_target;
 	struct MPT2SAS_TARGET *priv_target = starget->hostdata;
+	char *device_str = NULL;
 
 	if (!priv_target)
 		return;
 
+	if (ioc->hide_ir_msg)
+		device_str = "WarpDrive";
+	else
+		device_str = "volume";
+
 	if (log_info == 0x31170000)
 		return;
 
@@ -3660,8 +3999,8 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
 	scsi_print_command(scmd);
 
 	if (priv_target->flags & MPT_TARGET_FLAGS_VOLUME) {
-		printk(MPT2SAS_WARN_FMT "\tvolume wwid(0x%016llx)\n", ioc->name,
-		    (unsigned long long)priv_target->sas_address);
+		printk(MPT2SAS_WARN_FMT "\t%s wwid(0x%016llx)\n", ioc->name,
+		    device_str, (unsigned long long)priv_target->sas_address);
 	} else {
 		spin_lock_irqsave(&ioc->sas_device_lock, flags);
 		sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
@@ -3840,6 +4179,20 @@ _scsih_io_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 		scmd->result = DID_NO_CONNECT << 16;
 		goto out;
 	}
+	/*
+	 * WARPDRIVE: If direct_io is set then it is directIO,
+	 * the failed direct I/O should be redirected to volume
+	 */
+	if (_scsih_scsi_direct_io_get(ioc, smid)) {
+		_scsih_scsi_direct_io_set(ioc, smid, 0);
+		memcpy(mpi_request->CDB.CDB32, scmd->cmnd, scmd->cmd_len);
+		mpi_request->DevHandle =
+		    cpu_to_le16(sas_device_priv_data->sas_target->handle);
+		mpt2sas_base_put_smid_scsi_io(ioc, smid,
+		    sas_device_priv_data->sas_target->handle);
+		return 0;
+	}
+
 
 	/* turning off TLR */
 	scsi_state = mpi_reply->SCSIState;
@@ -3848,7 +4201,10 @@ _scsih_io_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 		    le32_to_cpu(mpi_reply->ResponseInfo) & 0xFF;
 	if (!sas_device_priv_data->tlr_snoop_check) {
 		sas_device_priv_data->tlr_snoop_check++;
-	if (!_scsih_is_raid(&scmd->device->sdev_gendev) &&
+	/* Make sure Device is not raid volume.
+	 * We do not expose raid functionality to upper layer for warpdrive.
+	 */
+	if (!ioc->is_warpdrive && !_scsih_is_raid(&scmd->device->sdev_gendev) &&
 		sas_is_tlr_enabled(scmd->device) &&
 		    response_code == MPI2_SCSITASKMGMT_RSP_INVALID_FRAME) {
 			sas_disable_tlr(scmd->device);
@@ -4681,8 +5037,10 @@ _scsih_remove_device(struct MPT2SAS_ADAPTER *ioc,
 
 	_scsih_ublock_io_device(ioc, sas_device_backup.handle);
 
-	mpt2sas_transport_port_remove(ioc, sas_device_backup.sas_address,
-	    sas_device_backup.sas_address_parent);
+	if (!ioc->hide_drives)
+		mpt2sas_transport_port_remove(ioc,
+		    sas_device_backup.sas_address,
+		    sas_device_backup.sas_address_parent);
 
 	printk(MPT2SAS_INFO_FMT "removing handle(0x%04x), sas_addr"
 	    "(0x%016llx)\n", ioc->name, sas_device_backup.handle,
@@ -5413,6 +5771,7 @@ _scsih_sas_pd_hide(struct MPT2SAS_ADAPTER *ioc,
 	    &sas_device->volume_wwid);
 	set_bit(handle, ioc->pd_handles);
 	_scsih_reprobe_target(sas_device->starget, 1);
+
 }
 
 /**
@@ -5591,7 +5950,8 @@ _scsih_sas_ir_config_change_event(struct MPT2SAS_ADAPTER *ioc,
 	Mpi2EventDataIrConfigChangeList_t *event_data = fw_event->event_data;
 
 #ifdef CONFIG_SCSI_MPT2SAS_LOGGING
-	if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
+	if ((ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
+	    && !ioc->hide_ir_msg)
 		_scsih_sas_ir_config_change_event_debug(ioc, event_data);
 
 #endif
@@ -5614,16 +5974,20 @@ _scsih_sas_ir_config_change_event(struct MPT2SAS_ADAPTER *ioc,
 				    le16_to_cpu(element->VolDevHandle));
 			break;
 		case MPI2_EVENT_IR_CHANGE_RC_PD_CREATED:
-			_scsih_sas_pd_hide(ioc, element);
+			if (!ioc->is_warpdrive)
+				_scsih_sas_pd_hide(ioc, element);
 			break;
 		case MPI2_EVENT_IR_CHANGE_RC_PD_DELETED:
-			_scsih_sas_pd_expose(ioc, element);
+			if (!ioc->is_warpdrive)
+				_scsih_sas_pd_expose(ioc, element);
 			break;
 		case MPI2_EVENT_IR_CHANGE_RC_HIDE:
-			_scsih_sas_pd_add(ioc, element);
+			if (!ioc->is_warpdrive)
+				_scsih_sas_pd_add(ioc, element);
 			break;
 		case MPI2_EVENT_IR_CHANGE_RC_UNHIDE:
-			_scsih_sas_pd_delete(ioc, element);
+			if (!ioc->is_warpdrive)
+				_scsih_sas_pd_delete(ioc, element);
 			break;
 		}
 	}
@@ -5654,9 +6018,10 @@ _scsih_sas_ir_volume_event(struct MPT2SAS_ADAPTER *ioc,
 
 	handle = le16_to_cpu(event_data->VolDevHandle);
 	state = le32_to_cpu(event_data->NewValue);
-	dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: handle(0x%04x), "
-	    "old(0x%08x), new(0x%08x)\n", ioc->name, __func__,  handle,
-	    le32_to_cpu(event_data->PreviousValue), state));
+	if (!ioc->hide_ir_msg)
+		dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: handle(0x%04x), "
+		    "old(0x%08x), new(0x%08x)\n", ioc->name, __func__,  handle,
+		    le32_to_cpu(event_data->PreviousValue), state));
 
 	switch (state) {
 	case MPI2_RAID_VOL_STATE_MISSING:
@@ -5736,9 +6101,10 @@ _scsih_sas_ir_physical_disk_event(struct MPT2SAS_ADAPTER *ioc,
 	handle = le16_to_cpu(event_data->PhysDiskDevHandle);
 	state = le32_to_cpu(event_data->NewValue);
 
-	dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: handle(0x%04x), "
-	    "old(0x%08x), new(0x%08x)\n", ioc->name, __func__,  handle,
-	    le32_to_cpu(event_data->PreviousValue), state));
+	if (!ioc->hide_ir_msg)
+		dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: handle(0x%04x), "
+		    "old(0x%08x), new(0x%08x)\n", ioc->name, __func__,  handle,
+		    le32_to_cpu(event_data->PreviousValue), state));
 
 	switch (state) {
 	case MPI2_RAID_PD_STATE_ONLINE:
@@ -5747,7 +6113,8 @@ _scsih_sas_ir_physical_disk_event(struct MPT2SAS_ADAPTER *ioc,
 	case MPI2_RAID_PD_STATE_OPTIMAL:
 	case MPI2_RAID_PD_STATE_HOT_SPARE:
 
-		set_bit(handle, ioc->pd_handles);
+		if (!ioc->is_warpdrive)
+			set_bit(handle, ioc->pd_handles);
 
 		spin_lock_irqsave(&ioc->sas_device_lock, flags);
 		sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
@@ -5851,7 +6218,8 @@ _scsih_sas_ir_operation_status_event(struct MPT2SAS_ADAPTER *ioc,
 	u16 handle;
 
 #ifdef CONFIG_SCSI_MPT2SAS_LOGGING
-	if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
+	if ((ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
+	    && !ioc->hide_ir_msg)
 		_scsih_sas_ir_operation_status_event_debug(ioc,
 		     event_data);
 #endif
@@ -5910,7 +6278,7 @@ static void
 _scsih_mark_responding_sas_device(struct MPT2SAS_ADAPTER *ioc, u64 sas_address,
     u16 slot, u16 handle)
 {
-	struct MPT2SAS_TARGET *sas_target_priv_data;
+	struct MPT2SAS_TARGET *sas_target_priv_data = NULL;
 	struct scsi_target *starget;
 	struct _sas_device *sas_device;
 	unsigned long flags;
@@ -5918,7 +6286,7 @@ _scsih_mark_responding_sas_device(struct MPT2SAS_ADAPTER *ioc, u64 sas_address,
 	spin_lock_irqsave(&ioc->sas_device_lock, flags);
 	list_for_each_entry(sas_device, &ioc->sas_device_list, list) {
 		if (sas_device->sas_address == sas_address &&
-		    sas_device->slot == slot && sas_device->starget) {
+		    sas_device->slot == slot) {
 			sas_device->responding = 1;
 			starget = sas_device->starget;
 			if (starget && starget->hostdata) {
@@ -5927,13 +6295,15 @@ _scsih_mark_responding_sas_device(struct MPT2SAS_ADAPTER *ioc, u64 sas_address,
 				sas_target_priv_data->deleted = 0;
 			} else
 				sas_target_priv_data = NULL;
-			starget_printk(KERN_INFO, sas_device->starget,
-			    "handle(0x%04x), sas_addr(0x%016llx), enclosure "
-			    "logical id(0x%016llx), slot(%d)\n", handle,
-			    (unsigned long long)sas_device->sas_address,
-			    (unsigned long long)
-			    sas_device->enclosure_logical_id,
-			    sas_device->slot);
+			if (starget)
+				starget_printk(KERN_INFO, starget,
+				    "handle(0x%04x), sas_addr(0x%016llx), "
+				    "enclosure logical id(0x%016llx), "
+				    "slot(%d)\n", handle,
+				    (unsigned long long)sas_device->sas_address,
+				    (unsigned long long)
+				    sas_device->enclosure_logical_id,
+				    sas_device->slot);
 			if (sas_device->handle == handle)
 				goto out;
 			printk(KERN_INFO "\thandle changed from(0x%04x)!!!\n",
@@ -6025,6 +6395,12 @@ _scsih_mark_responding_raid_device(struct MPT2SAS_ADAPTER *ioc, u64 wwid,
 			starget_printk(KERN_INFO, raid_device->starget,
 			    "handle(0x%04x), wwid(0x%016llx)\n", handle,
 			    (unsigned long long)raid_device->wwid);
+			/*
+			 * WARPDRIVE: The handles of the PDs might have changed
+			 * across the host reset so re-initialize the
+			 * required data for Direct IO
+			 */
+			_scsih_init_warpdrive_properties(ioc, raid_device);
 			if (raid_device->handle == handle)
 				goto out;
 			printk(KERN_INFO "\thandle changed from(0x%04x)!!!\n",
@@ -6086,18 +6462,20 @@ _scsih_search_responding_raid_devices(struct MPT2SAS_ADAPTER *ioc)
 	}
 
 	/* refresh the pd_handles */
-	phys_disk_num = 0xFF;
-	memset(ioc->pd_handles, 0, ioc->pd_handles_sz);
-	while (!(mpt2sas_config_get_phys_disk_pg0(ioc, &mpi_reply,
-	    &pd_pg0, MPI2_PHYSDISK_PGAD_FORM_GET_NEXT_PHYSDISKNUM,
-	    phys_disk_num))) {
-		ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
-		    MPI2_IOCSTATUS_MASK;
-		if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE)
-			break;
-		phys_disk_num = pd_pg0.PhysDiskNum;
-		handle = le16_to_cpu(pd_pg0.DevHandle);
-		set_bit(handle, ioc->pd_handles);
+	if (!ioc->is_warpdrive) {
+		phys_disk_num = 0xFF;
+		memset(ioc->pd_handles, 0, ioc->pd_handles_sz);
+		while (!(mpt2sas_config_get_phys_disk_pg0(ioc, &mpi_reply,
+		    &pd_pg0, MPI2_PHYSDISK_PGAD_FORM_GET_NEXT_PHYSDISKNUM,
+		    phys_disk_num))) {
+			ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
+			    MPI2_IOCSTATUS_MASK;
+			if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE)
+				break;
+			phys_disk_num = pd_pg0.PhysDiskNum;
+			handle = le16_to_cpu(pd_pg0.DevHandle);
+			set_bit(handle, ioc->pd_handles);
+		}
 	}
 }
 
@@ -6243,6 +6621,50 @@ _scsih_remove_unresponding_sas_devices(struct MPT2SAS_ADAPTER *ioc)
 }
 
 /**
+ * _scsih_hide_unhide_sas_devices - add/remove device to/from OS
+ * @ioc: per adapter object
+ *
+ * Return nothing.
+ */
+static void
+_scsih_hide_unhide_sas_devices(struct MPT2SAS_ADAPTER *ioc)
+{
+	struct _sas_device *sas_device, *sas_device_next;
+
+	if (!ioc->is_warpdrive || ioc->mfg_pg10_hide_flag !=
+	    MFG_PAGE10_HIDE_IF_VOL_PRESENT)
+		return;
+
+	if (ioc->hide_drives) {
+		if (_scsih_get_num_volumes(ioc))
+			return;
+		ioc->hide_drives = 0;
+		list_for_each_entry_safe(sas_device, sas_device_next,
+		    &ioc->sas_device_list, list) {
+			if (!mpt2sas_transport_port_add(ioc, sas_device->handle,
+				sas_device->sas_address_parent)) {
+				_scsih_sas_device_remove(ioc, sas_device);
+			} else if (!sas_device->starget) {
+				mpt2sas_transport_port_remove(ioc,
+				    sas_device->sas_address,
+				    sas_device->sas_address_parent);
+				_scsih_sas_device_remove(ioc, sas_device);
+			}
+		}
+	} else {
+		if (!_scsih_get_num_volumes(ioc))
+			return;
+		ioc->hide_drives = 1;
+		list_for_each_entry_safe(sas_device, sas_device_next,
+		    &ioc->sas_device_list, list) {
+			mpt2sas_transport_port_remove(ioc,
+			    sas_device->sas_address,
+			    sas_device->sas_address_parent);
+		}
+	}
+}
+
+/**
  * mpt2sas_scsih_reset_handler - reset callback handler (for scsih)
  * @ioc: per adapter object
  * @reset_phase: phase
@@ -6326,6 +6748,7 @@ _firmware_event_work(struct work_struct *work)
 			spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock,
 			    flags);
 		_scsih_remove_unresponding_sas_devices(ioc);
+		_scsih_hide_unhide_sas_devices(ioc);
 		return;
 	}
 
@@ -6425,6 +6848,53 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
 		    (Mpi2EventDataIrVolume_t *)
 		    mpi_reply->EventData);
 		break;
+	case MPI2_EVENT_LOG_ENTRY_ADDED:
+	{
+		Mpi2EventDataLogEntryAdded_t *log_entry;
+		u32 *log_code;
+
+		if (!ioc->is_warpdrive)
+			break;
+
+		log_entry = (Mpi2EventDataLogEntryAdded_t *)
+		    mpi_reply->EventData;
+		log_code = (u32 *)log_entry->LogData;
+
+		if (le16_to_cpu(log_entry->LogEntryQualifier)
+		    != MPT2_WARPDRIVE_LOGENTRY)
+			break;
+
+		switch (le32_to_cpu(*log_code)) {
+		case MPT2_WARPDRIVE_LC_SSDT:
+			printk(MPT2SAS_WARN_FMT "WarpDrive Warning: "
+			    "IO Throttling has occurred in the WarpDrive "
+			    "subsystem. Check WarpDrive documentation for "
+			    "additional details.\n", ioc->name);
+			break;
+		case MPT2_WARPDRIVE_LC_SSDLW:
+			printk(MPT2SAS_WARN_FMT "WarpDrive Warning: "
+			    "Program/Erase Cycles for the WarpDrive subsystem "
+			    "in degraded range. Check WarpDrive documentation "
+			    "for additional details.\n", ioc->name);
+			break;
+		case MPT2_WARPDRIVE_LC_SSDLF:
+			printk(MPT2SAS_ERR_FMT "WarpDrive Fatal Error: "
+			    "There are no Program/Erase Cycles for the "
+			    "WarpDrive subsystem. The storage device will be "
+			    "in read-only mode. Check WarpDrive documentation "
+			    "for additional details.\n", ioc->name);
+			break;
+		case MPT2_WARPDRIVE_LC_BRMF:
+			printk(MPT2SAS_ERR_FMT "WarpDrive Fatal Error: "
+			    "The Backup Rail Monitor has failed on the "
+			    "WarpDrive subsystem. Check WarpDrive "
+			    "documentation for additional details.\n",
+			    ioc->name);
+			break;
+		}
+
+		break;
+	}
 	case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE:
 	case MPI2_EVENT_IR_OPERATION_STATUS:
 	case MPI2_EVENT_SAS_DISCOVERY:
@@ -6583,7 +7053,8 @@ _scsih_ir_shutdown(struct MPT2SAS_ADAPTER *ioc)
 	mpi_request->Function = MPI2_FUNCTION_RAID_ACTION;
 	mpi_request->Action = MPI2_RAID_ACTION_SYSTEM_SHUTDOWN_INITIATED;
 
-	printk(MPT2SAS_INFO_FMT "IR shutdown (sending)\n", ioc->name);
+	if (!ioc->hide_ir_msg)
+		printk(MPT2SAS_INFO_FMT "IR shutdown (sending)\n", ioc->name);
 	init_completion(&ioc->scsih_cmds.done);
 	mpt2sas_base_put_smid_default(ioc, smid);
 	wait_for_completion_timeout(&ioc->scsih_cmds.done, 10*HZ);
@@ -6597,10 +7068,11 @@ _scsih_ir_shutdown(struct MPT2SAS_ADAPTER *ioc)
 	if (ioc->scsih_cmds.status & MPT2_CMD_REPLY_VALID) {
 		mpi_reply = ioc->scsih_cmds.reply;
 
-		printk(MPT2SAS_INFO_FMT "IR shutdown (complete): "
-		    "ioc_status(0x%04x), loginfo(0x%08x)\n",
-		    ioc->name, le16_to_cpu(mpi_reply->IOCStatus),
-		    le32_to_cpu(mpi_reply->IOCLogInfo));
+		if (!ioc->hide_ir_msg)
+			printk(MPT2SAS_INFO_FMT "IR shutdown (complete): "
+			    "ioc_status(0x%04x), loginfo(0x%08x)\n",
+			    ioc->name, le16_to_cpu(mpi_reply->IOCStatus),
+			    le32_to_cpu(mpi_reply->IOCLogInfo));
 	}
 
  out:
@@ -6759,6 +7231,9 @@ _scsih_probe_boot_devices(struct MPT2SAS_ADAPTER *ioc)
 		spin_lock_irqsave(&ioc->sas_device_lock, flags);
 		list_move_tail(&sas_device->list, &ioc->sas_device_list);
 		spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+
+		if (ioc->hide_drives)
+			return;
 		if (!mpt2sas_transport_port_add(ioc, sas_device->handle,
 		    sas_device->sas_address_parent)) {
 			_scsih_sas_device_remove(ioc, sas_device);
@@ -6812,6 +7287,9 @@ _scsih_probe_sas(struct MPT2SAS_ADAPTER *ioc)
 		list_move_tail(&sas_device->list, &ioc->sas_device_list);
 		spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
 
+		if (ioc->hide_drives)
+			continue;
+
 		if (!mpt2sas_transport_port_add(ioc, sas_device->handle,
 		    sas_device->sas_address_parent)) {
 			_scsih_sas_device_remove(ioc, sas_device);
@@ -6882,6 +7360,11 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	ioc->id = mpt_ids++;
 	sprintf(ioc->name, "%s%d", MPT2SAS_DRIVER_NAME, ioc->id);
 	ioc->pdev = pdev;
+	if (id->device == MPI2_MFGPAGE_DEVID_SSS6200) {
+		ioc->is_warpdrive = 1;
+		ioc->hide_ir_msg = 1;
+	} else
+		ioc->mfg_pg10_hide_flag = MFG_PAGE10_EXPOSE_ALL_DISKS;
 	ioc->scsi_io_cb_idx = scsi_io_cb_idx;
 	ioc->tm_cb_idx = tm_cb_idx;
 	ioc->ctl_cb_idx = ctl_cb_idx;
@@ -6947,6 +7430,20 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	ioc->wait_for_port_enable_to_complete = 0;
+	if (ioc->is_warpdrive) {
+		if (ioc->mfg_pg10_hide_flag ==  MFG_PAGE10_EXPOSE_ALL_DISKS)
+			ioc->hide_drives = 0;
+		else if (ioc->mfg_pg10_hide_flag ==  MFG_PAGE10_HIDE_ALL_DISKS)
+			ioc->hide_drives = 1;
+		else {
+			if (_scsih_get_num_volumes(ioc))
+				ioc->hide_drives = 1;
+			else
+				ioc->hide_drives = 0;
+		}
+	} else
+		ioc->hide_drives = 0;
+
 	_scsih_probe_devices(ioc);
 	return 0;
 
diff --git a/drivers/scsi/mvsas/Kconfig b/drivers/scsi/mvsas/Kconfig
index 6de7af27e507..c82b012aba37 100644
--- a/drivers/scsi/mvsas/Kconfig
+++ b/drivers/scsi/mvsas/Kconfig
@@ -3,6 +3,7 @@
 #
 # Copyright 2007 Red Hat, Inc.
 # Copyright 2008 Marvell. <kewei@marvell.com>
+# Copyright 2009-20011 Marvell. <yuxiangl@marvell.com>
 #
 # This file is licensed under GPLv2.
 #
diff --git a/drivers/scsi/mvsas/Makefile b/drivers/scsi/mvsas/Makefile
index ffbf759e46f1..87b231a5bd5e 100644
--- a/drivers/scsi/mvsas/Makefile
+++ b/drivers/scsi/mvsas/Makefile
@@ -3,6 +3,7 @@
 #
 # Copyright 2007 Red Hat, Inc.
 # Copyright 2008 Marvell. <kewei@marvell.com>
+# Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
 #
 # This file is licensed under GPLv2.
 #
diff --git a/drivers/scsi/mvsas/mv_64xx.c b/drivers/scsi/mvsas/mv_64xx.c
index afc7f6f3a13e..13c960481391 100644
--- a/drivers/scsi/mvsas/mv_64xx.c
+++ b/drivers/scsi/mvsas/mv_64xx.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
diff --git a/drivers/scsi/mvsas/mv_64xx.h b/drivers/scsi/mvsas/mv_64xx.h
index 42e947d9795e..545889bd9753 100644
--- a/drivers/scsi/mvsas/mv_64xx.h
+++ b/drivers/scsi/mvsas/mv_64xx.h
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
diff --git a/drivers/scsi/mvsas/mv_94xx.c b/drivers/scsi/mvsas/mv_94xx.c
index eed4c5c72013..78162c3c36e6 100644
--- a/drivers/scsi/mvsas/mv_94xx.c
+++ b/drivers/scsi/mvsas/mv_94xx.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
diff --git a/drivers/scsi/mvsas/mv_94xx.h b/drivers/scsi/mvsas/mv_94xx.h
index 23ed9b164669..8835befe2c0e 100644
--- a/drivers/scsi/mvsas/mv_94xx.h
+++ b/drivers/scsi/mvsas/mv_94xx.h
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
diff --git a/drivers/scsi/mvsas/mv_chips.h b/drivers/scsi/mvsas/mv_chips.h
index a67e1c4172f9..1753a6fc42d0 100644
--- a/drivers/scsi/mvsas/mv_chips.h
+++ b/drivers/scsi/mvsas/mv_chips.h
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h
index 1849da1f030d..bc00c940743c 100644
--- a/drivers/scsi/mvsas/mv_defs.h
+++ b/drivers/scsi/mvsas/mv_defs.h
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
@@ -34,6 +35,8 @@ enum chip_flavors {
 	chip_6485,
 	chip_9480,
 	chip_9180,
+	chip_9445,
+	chip_9485,
 	chip_1300,
 	chip_1320
 };
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 938d045e4180..90b636611cde 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
@@ -25,13 +26,24 @@
 
 #include "mv_sas.h"
 
+static int lldd_max_execute_num = 1;
+module_param_named(collector, lldd_max_execute_num, int, S_IRUGO);
+MODULE_PARM_DESC(collector, "\n"
+	"\tIf greater than one, tells the SAS Layer to run in Task Collector\n"
+	"\tMode.  If 1 or 0, tells the SAS Layer to run in Direct Mode.\n"
+	"\tThe mvsas SAS LLDD supports both modes.\n"
+	"\tDefault: 1 (Direct Mode).\n");
+
 static struct scsi_transport_template *mvs_stt;
+struct kmem_cache *mvs_task_list_cache;
 static const struct mvs_chip_info mvs_chips[] = {
 	[chip_6320] =	{ 1, 2, 0x400, 17, 16,  9, &mvs_64xx_dispatch, },
 	[chip_6440] =	{ 1, 4, 0x400, 17, 16,  9, &mvs_64xx_dispatch, },
 	[chip_6485] =	{ 1, 8, 0x800, 33, 32, 10, &mvs_64xx_dispatch, },
 	[chip_9180] =	{ 2, 4, 0x800, 17, 64,  9, &mvs_94xx_dispatch, },
 	[chip_9480] =	{ 2, 4, 0x800, 17, 64,  9, &mvs_94xx_dispatch, },
+	[chip_9445] =	{ 1, 4, 0x800, 17, 64, 11, &mvs_94xx_dispatch, },
+	[chip_9485] =	{ 2, 4, 0x800, 17, 64, 11, &mvs_94xx_dispatch, },
 	[chip_1300] =	{ 1, 4, 0x400, 17, 16,  9, &mvs_64xx_dispatch, },
 	[chip_1320] =	{ 2, 4, 0x800, 17, 64,  9, &mvs_94xx_dispatch, },
 };
@@ -107,7 +119,6 @@ static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
 
 static void mvs_free(struct mvs_info *mvi)
 {
-	int i;
 	struct mvs_wq *mwq;
 	int slot_nr;
 
@@ -119,12 +130,8 @@ static void mvs_free(struct mvs_info *mvi)
 	else
 		slot_nr = MVS_SLOTS;
 
-	for (i = 0; i < mvi->tags_num; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
-		if (slot->buf)
-			dma_free_coherent(mvi->dev, MVS_SLOT_BUF_SZ,
-					  slot->buf, slot->buf_dma);
-	}
+	if (mvi->dma_pool)
+		pci_pool_destroy(mvi->dma_pool);
 
 	if (mvi->tx)
 		dma_free_coherent(mvi->dev,
@@ -213,6 +220,7 @@ static irqreturn_t mvs_interrupt(int irq, void *opaque)
 static int __devinit mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost)
 {
 	int i = 0, slot_nr;
+	char pool_name[32];
 
 	if (mvi->flags & MVF_FLAG_SOC)
 		slot_nr = MVS_SOC_SLOTS;
@@ -272,18 +280,14 @@ static int __devinit mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost)
 	if (!mvi->bulk_buffer)
 		goto err_out;
 #endif
-	for (i = 0; i < slot_nr; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
-
-		slot->buf = dma_alloc_coherent(mvi->dev, MVS_SLOT_BUF_SZ,
-					       &slot->buf_dma, GFP_KERNEL);
-		if (!slot->buf) {
-			printk(KERN_DEBUG"failed to allocate slot->buf.\n");
+	sprintf(pool_name, "%s%d", "mvs_dma_pool", mvi->id);
+	mvi->dma_pool = pci_pool_create(pool_name, mvi->pdev, MVS_SLOT_BUF_SZ, 16, 0);
+	if (!mvi->dma_pool) {
+			printk(KERN_DEBUG "failed to create dma pool %s.\n", pool_name);
 			goto err_out;
-		}
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
-		++mvi->tags_num;
 	}
+	mvi->tags_num = slot_nr;
+
 	/* Initialize tags */
 	mvs_tag_init(mvi);
 	return 0;
@@ -484,7 +488,7 @@ static void  __devinit mvs_post_sas_ha_init(struct Scsi_Host *shost,
 
 	sha->num_phys = nr_core * chip_info->n_phy;
 
-	sha->lldd_max_execute_num = 1;
+	sha->lldd_max_execute_num = lldd_max_execute_num;
 
 	if (mvi->flags & MVF_FLAG_SOC)
 		can_queue = MVS_SOC_CAN_QUEUE;
@@ -670,6 +674,24 @@ static struct pci_device_id __devinitdata mvs_pci_table[] = {
 	{ PCI_VDEVICE(TTI, 0x2740), chip_9480 },
 	{ PCI_VDEVICE(TTI, 0x2744), chip_9480 },
 	{ PCI_VDEVICE(TTI, 0x2760), chip_9480 },
+	{
+		.vendor		= 0x1b4b,
+		.device		= 0x9445,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= 0x9480,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= chip_9445,
+	},
+	{
+		.vendor		= 0x1b4b,
+		.device		= 0x9485,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= 0x9480,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= chip_9485,
+	},
 
 	{ }	/* terminate list */
 };
@@ -690,6 +712,14 @@ static int __init mvs_init(void)
 	if (!mvs_stt)
 		return -ENOMEM;
 
+	mvs_task_list_cache = kmem_cache_create("mvs_task_list", sizeof(struct mvs_task_list),
+							 0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!mvs_task_list_cache) {
+		rc = -ENOMEM;
+		mv_printk("%s: mvs_task_list_cache alloc failed! \n", __func__);
+		goto err_out;
+	}
+
 	rc = pci_register_driver(&mvs_pci_driver);
 
 	if (rc)
@@ -706,6 +736,7 @@ static void __exit mvs_exit(void)
 {
 	pci_unregister_driver(&mvs_pci_driver);
 	sas_release_transport(mvs_stt);
+	kmem_cache_destroy(mvs_task_list_cache);
 }
 
 module_init(mvs_init);
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index adedaa916ecb..0ef27425c447 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
@@ -862,178 +863,286 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 }
 
 #define	DEV_IS_GONE(mvi_dev)	((!mvi_dev || (mvi_dev->dev_type == NO_DEVICE)))
-static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
-				struct completion *completion,int is_tmf,
-				struct mvs_tmf_task *tmf)
+static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf,
+				struct mvs_tmf_task *tmf, int *pass)
 {
 	struct domain_device *dev = task->dev;
-	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
-	struct mvs_info *mvi = mvi_dev->mvi_info;
+	struct mvs_device *mvi_dev = dev->lldd_dev;
 	struct mvs_task_exec_info tei;
-	struct sas_task *t = task;
 	struct mvs_slot_info *slot;
-	u32 tag = 0xdeadbeef, rc, n_elem = 0;
-	u32 n = num, pass = 0;
-	unsigned long flags = 0,  flags_libsas = 0;
+	u32 tag = 0xdeadbeef, n_elem = 0;
+	int rc = 0;
 
 	if (!dev->port) {
-		struct task_status_struct *tsm = &t->task_status;
+		struct task_status_struct *tsm = &task->task_status;
 
 		tsm->resp = SAS_TASK_UNDELIVERED;
 		tsm->stat = SAS_PHY_DOWN;
+		/*
+		 * libsas will use dev->port, should
+		 * not call task_done for sata
+		 */
 		if (dev->dev_type != SATA_DEV)
-			t->task_done(t);
-		return 0;
+			task->task_done(task);
+		return rc;
 	}
 
-	spin_lock_irqsave(&mvi->lock, flags);
-	do {
-		dev = t->dev;
-		mvi_dev = dev->lldd_dev;
-		if (DEV_IS_GONE(mvi_dev)) {
-			if (mvi_dev)
-				mv_dprintk("device %d not ready.\n",
-					mvi_dev->device_id);
-			else
-				mv_dprintk("device %016llx not ready.\n",
-					SAS_ADDR(dev->sas_addr));
+	if (DEV_IS_GONE(mvi_dev)) {
+		if (mvi_dev)
+			mv_dprintk("device %d not ready.\n",
+				mvi_dev->device_id);
+		else
+			mv_dprintk("device %016llx not ready.\n",
+				SAS_ADDR(dev->sas_addr));
 
 			rc = SAS_PHY_DOWN;
-			goto out_done;
-		}
+			return rc;
+	}
+	tei.port = dev->port->lldd_port;
+	if (tei.port && !tei.port->port_attached && !tmf) {
+		if (sas_protocol_ata(task->task_proto)) {
+			struct task_status_struct *ts = &task->task_status;
+			mv_dprintk("SATA/STP port %d does not attach"
+					"device.\n", dev->port->id);
+			ts->resp = SAS_TASK_COMPLETE;
+			ts->stat = SAS_PHY_DOWN;
 
-		if (dev->port->id >= mvi->chip->n_phy)
-			tei.port = &mvi->port[dev->port->id - mvi->chip->n_phy];
-		else
-			tei.port = &mvi->port[dev->port->id];
-
-		if (tei.port && !tei.port->port_attached) {
-			if (sas_protocol_ata(t->task_proto)) {
-				struct task_status_struct *ts = &t->task_status;
-
-				mv_dprintk("port %d does not"
-					"attached device.\n", dev->port->id);
-				ts->stat = SAS_PROTO_RESPONSE;
-				ts->stat = SAS_PHY_DOWN;
-				spin_unlock_irqrestore(dev->sata_dev.ap->lock,
-						       flags_libsas);
-				spin_unlock_irqrestore(&mvi->lock, flags);
-				t->task_done(t);
-				spin_lock_irqsave(&mvi->lock, flags);
-				spin_lock_irqsave(dev->sata_dev.ap->lock,
-						  flags_libsas);
-				if (n > 1)
-					t = list_entry(t->list.next,
-						       struct sas_task, list);
-				continue;
-			} else {
-				struct task_status_struct *ts = &t->task_status;
-				ts->resp = SAS_TASK_UNDELIVERED;
-				ts->stat = SAS_PHY_DOWN;
-				t->task_done(t);
-				if (n > 1)
-					t = list_entry(t->list.next,
-							struct sas_task, list);
-				continue;
-			}
-		}
+			task->task_done(task);
 
-		if (!sas_protocol_ata(t->task_proto)) {
-			if (t->num_scatter) {
-				n_elem = dma_map_sg(mvi->dev,
-						    t->scatter,
-						    t->num_scatter,
-						    t->data_dir);
-				if (!n_elem) {
-					rc = -ENOMEM;
-					goto err_out;
-				}
-			}
 		} else {
-			n_elem = t->num_scatter;
+			struct task_status_struct *ts = &task->task_status;
+			mv_dprintk("SAS port %d does not attach"
+				"device.\n", dev->port->id);
+			ts->resp = SAS_TASK_UNDELIVERED;
+			ts->stat = SAS_PHY_DOWN;
+			task->task_done(task);
 		}
+		return rc;
+	}
 
-		rc = mvs_tag_alloc(mvi, &tag);
-		if (rc)
-			goto err_out;
+	if (!sas_protocol_ata(task->task_proto)) {
+		if (task->num_scatter) {
+			n_elem = dma_map_sg(mvi->dev,
+					    task->scatter,
+					    task->num_scatter,
+					    task->data_dir);
+			if (!n_elem) {
+				rc = -ENOMEM;
+				goto prep_out;
+			}
+		}
+	} else {
+		n_elem = task->num_scatter;
+	}
 
-		slot = &mvi->slot_info[tag];
+	rc = mvs_tag_alloc(mvi, &tag);
+	if (rc)
+		goto err_out;
 
+	slot = &mvi->slot_info[tag];
 
-		t->lldd_task = NULL;
-		slot->n_elem = n_elem;
-		slot->slot_tag = tag;
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+	task->lldd_task = NULL;
+	slot->n_elem = n_elem;
+	slot->slot_tag = tag;
+
+	slot->buf = pci_pool_alloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
+	if (!slot->buf)
+		goto err_out_tag;
+	memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+
+	tei.task = task;
+	tei.hdr = &mvi->slot[tag];
+	tei.tag = tag;
+	tei.n_elem = n_elem;
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SMP:
+		rc = mvs_task_prep_smp(mvi, &tei);
+		break;
+	case SAS_PROTOCOL_SSP:
+		rc = mvs_task_prep_ssp(mvi, &tei, is_tmf, tmf);
+		break;
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+		rc = mvs_task_prep_ata(mvi, &tei);
+		break;
+	default:
+		dev_printk(KERN_ERR, mvi->dev,
+			"unknown sas_task proto: 0x%x\n",
+			task->task_proto);
+		rc = -EINVAL;
+		break;
+	}
 
-		tei.task = t;
-		tei.hdr = &mvi->slot[tag];
-		tei.tag = tag;
-		tei.n_elem = n_elem;
-		switch (t->task_proto) {
-		case SAS_PROTOCOL_SMP:
-			rc = mvs_task_prep_smp(mvi, &tei);
-			break;
-		case SAS_PROTOCOL_SSP:
-			rc = mvs_task_prep_ssp(mvi, &tei, is_tmf, tmf);
-			break;
-		case SAS_PROTOCOL_SATA:
-		case SAS_PROTOCOL_STP:
-		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-			rc = mvs_task_prep_ata(mvi, &tei);
-			break;
-		default:
-			dev_printk(KERN_ERR, mvi->dev,
-				   "unknown sas_task proto: 0x%x\n",
-				   t->task_proto);
-			rc = -EINVAL;
-			break;
-		}
+	if (rc) {
+		mv_dprintk("rc is %x\n", rc);
+		goto err_out_slot_buf;
+	}
+	slot->task = task;
+	slot->port = tei.port;
+	task->lldd_task = slot;
+	list_add_tail(&slot->entry, &tei.port->list);
+	spin_lock(&task->task_state_lock);
+	task->task_state_flags |= SAS_TASK_AT_INITIATOR;
+	spin_unlock(&task->task_state_lock);
 
-		if (rc) {
-			mv_dprintk("rc is %x\n", rc);
-			goto err_out_tag;
-		}
-		slot->task = t;
-		slot->port = tei.port;
-		t->lldd_task = slot;
-		list_add_tail(&slot->entry, &tei.port->list);
-		/* TODO: select normal or high priority */
-		spin_lock(&t->task_state_lock);
-		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
-		spin_unlock(&t->task_state_lock);
-
-		mvs_hba_memory_dump(mvi, tag, t->task_proto);
-		mvi_dev->running_req++;
-		++pass;
-		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
-		if (n > 1)
-			t = list_entry(t->list.next, struct sas_task, list);
-		if (likely(pass))
-			MVS_CHIP_DISP->start_delivery(mvi, (mvi->tx_prod - 1) &
-						      (MVS_CHIP_SLOT_SZ - 1));
+	mvs_hba_memory_dump(mvi, tag, task->task_proto);
+	mvi_dev->running_req++;
+	++(*pass);
+	mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
 
-	} while (--n);
-	rc = 0;
-	goto out_done;
+	return rc;
 
+err_out_slot_buf:
+	pci_pool_free(mvi->dma_pool, slot->buf, slot->buf_dma);
 err_out_tag:
 	mvs_tag_free(mvi, tag);
 err_out:
 
-	dev_printk(KERN_ERR, mvi->dev, "mvsas exec failed[%d]!\n", rc);
-	if (!sas_protocol_ata(t->task_proto))
+	dev_printk(KERN_ERR, mvi->dev, "mvsas prep failed[%d]!\n", rc);
+	if (!sas_protocol_ata(task->task_proto))
 		if (n_elem)
-			dma_unmap_sg(mvi->dev, t->scatter, n_elem,
-				     t->data_dir);
-out_done:
+			dma_unmap_sg(mvi->dev, task->scatter, n_elem,
+				     task->data_dir);
+prep_out:
+	return rc;
+}
+
+static struct mvs_task_list *mvs_task_alloc_list(int *num, gfp_t gfp_flags)
+{
+	struct mvs_task_list *first = NULL;
+
+	for (; *num > 0; --*num) {
+		struct mvs_task_list *mvs_list = kmem_cache_zalloc(mvs_task_list_cache, gfp_flags);
+
+		if (!mvs_list)
+			break;
+
+		INIT_LIST_HEAD(&mvs_list->list);
+		if (!first)
+			first = mvs_list;
+		else
+			list_add_tail(&mvs_list->list, &first->list);
+
+	}
+
+	return first;
+}
+
+static inline void mvs_task_free_list(struct mvs_task_list *mvs_list)
+{
+	LIST_HEAD(list);
+	struct list_head *pos, *a;
+	struct mvs_task_list *mlist = NULL;
+
+	__list_add(&list, mvs_list->list.prev, &mvs_list->list);
+
+	list_for_each_safe(pos, a, &list) {
+		list_del_init(pos);
+		mlist = list_entry(pos, struct mvs_task_list, list);
+		kmem_cache_free(mvs_task_list_cache, mlist);
+	}
+}
+
+static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
+				struct completion *completion, int is_tmf,
+				struct mvs_tmf_task *tmf)
+{
+	struct domain_device *dev = task->dev;
+	struct mvs_info *mvi = NULL;
+	u32 rc = 0;
+	u32 pass = 0;
+	unsigned long flags = 0;
+
+	mvi = ((struct mvs_device *)task->dev->lldd_dev)->mvi_info;
+
+	if ((dev->dev_type == SATA_DEV) && (dev->sata_dev.ap != NULL))
+		spin_unlock_irq(dev->sata_dev.ap->lock);
+
+	spin_lock_irqsave(&mvi->lock, flags);
+	rc = mvs_task_prep(task, mvi, is_tmf, tmf, &pass);
+	if (rc)
+		dev_printk(KERN_ERR, mvi->dev, "mvsas exec failed[%d]!\n", rc);
+
+	if (likely(pass))
+			MVS_CHIP_DISP->start_delivery(mvi, (mvi->tx_prod - 1) &
+				(MVS_CHIP_SLOT_SZ - 1));
 	spin_unlock_irqrestore(&mvi->lock, flags);
+
+	if ((dev->dev_type == SATA_DEV) && (dev->sata_dev.ap != NULL))
+		spin_lock_irq(dev->sata_dev.ap->lock);
+
+	return rc;
+}
+
+static int mvs_collector_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
+				struct completion *completion, int is_tmf,
+				struct mvs_tmf_task *tmf)
+{
+	struct domain_device *dev = task->dev;
+	struct mvs_prv_info *mpi = dev->port->ha->lldd_ha;
+	struct mvs_info *mvi = NULL;
+	struct sas_task *t = task;
+	struct mvs_task_list *mvs_list = NULL, *a;
+	LIST_HEAD(q);
+	int pass[2] = {0};
+	u32 rc = 0;
+	u32 n = num;
+	unsigned long flags = 0;
+
+	mvs_list = mvs_task_alloc_list(&n, gfp_flags);
+	if (n) {
+		printk(KERN_ERR "%s: mvs alloc list failed.\n", __func__);
+		rc = -ENOMEM;
+		goto free_list;
+	}
+
+	__list_add(&q, mvs_list->list.prev, &mvs_list->list);
+
+	list_for_each_entry(a, &q, list) {
+		a->task = t;
+		t = list_entry(t->list.next, struct sas_task, list);
+	}
+
+	list_for_each_entry(a, &q , list) {
+
+		t = a->task;
+		mvi = ((struct mvs_device *)t->dev->lldd_dev)->mvi_info;
+
+		spin_lock_irqsave(&mvi->lock, flags);
+		rc = mvs_task_prep(t, mvi, is_tmf, tmf, &pass[mvi->id]);
+		if (rc)
+			dev_printk(KERN_ERR, mvi->dev, "mvsas exec failed[%d]!\n", rc);
+		spin_unlock_irqrestore(&mvi->lock, flags);
+	}
+
+	if (likely(pass[0]))
+			MVS_CHIP_DISP->start_delivery(mpi->mvi[0],
+				(mpi->mvi[0]->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
+
+	if (likely(pass[1]))
+			MVS_CHIP_DISP->start_delivery(mpi->mvi[1],
+				(mpi->mvi[1]->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
+
+	list_del_init(&q);
+
+free_list:
+	if (mvs_list)
+		mvs_task_free_list(mvs_list);
+
 	return rc;
 }
 
 int mvs_queue_command(struct sas_task *task, const int num,
 			gfp_t gfp_flags)
 {
-	return mvs_task_exec(task, num, gfp_flags, NULL, 0, NULL);
+	struct mvs_device *mvi_dev = task->dev->lldd_dev;
+	struct sas_ha_struct *sas = mvi_dev->mvi_info->sas;
+
+	if (sas->lldd_max_execute_num < 2)
+		return mvs_task_exec(task, num, gfp_flags, NULL, 0, NULL);
+	else
+		return mvs_collector_task_exec(task, num, gfp_flags, NULL, 0, NULL);
 }
 
 static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
@@ -1067,6 +1176,11 @@ static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
 		/* do nothing */
 		break;
 	}
+
+	if (slot->buf) {
+		pci_pool_free(mvi->dma_pool, slot->buf, slot->buf_dma);
+		slot->buf = NULL;
+	}
 	list_del_init(&slot->entry);
 	task->lldd_task = NULL;
 	slot->task = NULL;
@@ -1255,6 +1369,7 @@ static void mvs_port_notify_formed(struct asd_sas_phy *sas_phy, int lock)
 		spin_lock_irqsave(&mvi->lock, flags);
 	port->port_attached = 1;
 	phy->port = port;
+	sas_port->lldd_port = port;
 	if (phy->phy_type & PORT_TYPE_SAS) {
 		port->wide_port_phymap = sas_port->phy_mask;
 		mv_printk("set wide port phy map %x\n", sas_port->phy_mask);
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 77ddc7c1e5f2..1367d8b9350d 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -3,6 +3,7 @@
  *
  * Copyright 2007 Red Hat, Inc.
  * Copyright 2008 Marvell. <kewei@marvell.com>
+ * Copyright 2009-2011 Marvell. <yuxiangl@marvell.com>
  *
  * This file is licensed under GPLv2.
  *
@@ -67,6 +68,7 @@ extern struct mvs_tgt_initiator mvs_tgt;
 extern struct mvs_info *tgt_mvi;
 extern const struct mvs_dispatch mvs_64xx_dispatch;
 extern const struct mvs_dispatch mvs_94xx_dispatch;
+extern struct kmem_cache *mvs_task_list_cache;
 
 #define DEV_IS_EXPANDER(type)	\
 	((type == EDGE_DEV) || (type == FANOUT_DEV))
@@ -341,6 +343,7 @@ struct mvs_info {
 	dma_addr_t bulk_buffer_dma;
 #define TRASH_BUCKET_SIZE    	0x20000
 #endif
+	void *dma_pool;
 	struct mvs_slot_info slot_info[0];
 };
 
@@ -367,6 +370,11 @@ struct mvs_task_exec_info {
 	int n_elem;
 };
 
+struct mvs_task_list {
+	struct sas_task *task;
+	struct list_head list;
+};
+
 
 /******************** function prototype *********************/
 void mvs_get_sas_addr(void *buf, u32 buflen);
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index 835d8d66e696..4b3b4755945c 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -8147,7 +8147,7 @@ static int ncr53c8xx_abort(struct scsi_cmnd *cmd)
 	unsigned long flags;
 	struct scsi_cmnd *done_list;
 
-	printk("ncr53c8xx_abort: command pid %lu\n", cmd->serial_number);
+	printk("ncr53c8xx_abort\n");
 
 	NCR_LOCK_NCB(np, flags);
 
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 96d5ad0c1e42..7f636b118287 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3814,6 +3814,9 @@ static long pmcraid_ioctl_passthrough(
 			rc = -EFAULT;
 			goto out_free_buffer;
 		}
+	} else if (request_size < 0) {
+		rc = -EINVAL;
+		goto out_free_buffer;
 	}
 
 	/* check if we have any additional command parameters */
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 8ba5744c267e..d838205ab169 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -4066,7 +4066,7 @@ __qla1280_print_scsi_cmd(struct scsi_cmnd *cmd)
 	   } */
 	printk("  tag=%d, transfersize=0x%x \n",
 	       cmd->tag, cmd->transfersize);
-	printk("  Pid=%li, SP=0x%p\n", cmd->serial_number, CMD_SP(cmd));
+	printk("  SP=0x%p\n", CMD_SP(cmd));
 	printk(" underflow size = 0x%x, direction=0x%x\n",
 	       cmd->underflow, cmd->sc_data_direction);
 }
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index d3e58d763b43..532313e0725e 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -496,8 +496,8 @@ do_read:
 			offset = 0;
 		}
 
-		rval = qla2x00_read_sfp(vha, ha->sfp_data_dma, addr, offset,
-		    SFP_BLOCK_SIZE);
+		rval = qla2x00_read_sfp(vha, ha->sfp_data_dma, ha->sfp_data,
+		    addr, offset, SFP_BLOCK_SIZE, 0);
 		if (rval != QLA_SUCCESS) {
 			qla_printk(KERN_WARNING, ha,
 			    "Unable to read SFP data (%x/%x/%x).\n", rval,
@@ -628,12 +628,12 @@ qla2x00_sysfs_write_edc(struct file *filp, struct kobject *kobj,
 
 	memcpy(ha->edc_data, &buf[8], len);
 
-	rval = qla2x00_write_edc(vha, dev, adr, ha->edc_data_dma,
-	    ha->edc_data, len, opt);
+	rval = qla2x00_write_sfp(vha, ha->edc_data_dma, ha->edc_data,
+	    dev, adr, len, opt);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2(qla_printk(KERN_INFO, ha,
 		    "Unable to write EDC (%x) %02x:%02x:%04x:%02x:%02x.\n",
-		    rval, dev, adr, opt, len, *buf));
+		    rval, dev, adr, opt, len, buf[8]));
 		return 0;
 	}
 
@@ -685,8 +685,8 @@ qla2x00_sysfs_write_edc_status(struct file *filp, struct kobject *kobj,
 			return -EINVAL;
 
 	memset(ha->edc_data, 0, len);
-	rval = qla2x00_read_edc(vha, dev, adr, ha->edc_data_dma,
-	    ha->edc_data, len, opt);
+	rval = qla2x00_read_sfp(vha, ha->edc_data_dma, ha->edc_data,
+			dev, adr, len, opt);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2(qla_printk(KERN_INFO, ha,
 		    "Unable to write EDC status (%x) %02x:%02x:%04x:%02x.\n",
@@ -1568,7 +1568,7 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
 
 	/* Now that the rport has been deleted, set the fcport state to
 	   FCS_DEVICE_DEAD */
-	atomic_set(&fcport->state, FCS_DEVICE_DEAD);
+	qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
 
 	/*
 	 * Transport has effectively 'deleted' the rport, clear
@@ -1877,14 +1877,15 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 
 	scsi_remove_host(vha->host);
 
+	/* Allow timer to run to drain queued items, when removing vp */
+	qla24xx_deallocate_vp_id(vha);
+
 	if (vha->timer_active) {
 		qla2x00_vp_stop_timer(vha);
 		DEBUG15(printk(KERN_INFO "scsi(%ld): timer for the vport[%d]"
 		" = %p has stopped\n", vha->host_no, vha->vp_idx, vha));
 	}
 
-	qla24xx_deallocate_vp_id(vha);
-
 	/* No pending activities shall be there on the vha now */
 	DEBUG(msleep(random32()%10));  /* Just to see if something falls on
 					* the net we have placed below */
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 903b0586ded3..8c10e2c4928e 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_bsg.h b/drivers/scsi/qla2xxx/qla_bsg.h
index 074a999c7017..0f0f54e35f06 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.h
+++ b/drivers/scsi/qla2xxx/qla_bsg.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 096141148257..c53719a9a747 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index b74e6b5743dc..930414541ec6 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index ee20353c8550..cc5a79259d33 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -1717,6 +1717,14 @@ typedef struct fc_port {
 #define FCS_DEVICE_LOST		3
 #define FCS_ONLINE		4
 
+static const char * const port_state_str[] = {
+	"Unknown",
+	"UNCONFIGURED",
+	"DEAD",
+	"LOST",
+	"ONLINE"
+};
+
 /*
  * FC port flags.
  */
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 6271353e8c51..a5a4e1275bf2 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index f5ba09c8a663..691783abfb69 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -416,8 +416,7 @@ struct cmd_type_6 {
 	uint8_t vp_index;
 
 	uint32_t fcp_data_dseg_address[2];	/* Data segment address. */
-	uint16_t fcp_data_dseg_len;		/* Data segment length. */
-	uint16_t reserved_1;			/* MUST be set to 0. */
+	uint32_t fcp_data_dseg_len;		/* Data segment length. */
 };
 
 #define COMMAND_TYPE_7	0x18		/* Command Type 7 entry */
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index d48326ee3f61..0b381224ae4b 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -39,6 +39,8 @@ extern int qla81xx_load_risc(scsi_qla_host_t *, uint32_t *);
 extern int qla2x00_perform_loop_resync(scsi_qla_host_t *);
 extern int qla2x00_loop_resync(scsi_qla_host_t *);
 
+extern int qla2x00_find_new_loop_id(scsi_qla_host_t *, fc_port_t *);
+
 extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *);
 extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *);
 
@@ -100,6 +102,8 @@ extern int ql2xgffidenable;
 extern int ql2xenabledif;
 extern int ql2xenablehba_err_chk;
 extern int ql2xtargetreset;
+extern int ql2xdontresethba;
+extern unsigned int ql2xmaxlun;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
@@ -319,15 +323,12 @@ extern int
 qla2x00_disable_fce_trace(scsi_qla_host_t *, uint64_t *, uint64_t *);
 
 extern int
-qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint16_t, uint16_t, uint16_t);
-
-extern int
-qla2x00_read_edc(scsi_qla_host_t *, uint16_t, uint16_t, dma_addr_t,
-    uint8_t *, uint16_t, uint16_t);
+qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint8_t *,
+	uint16_t, uint16_t, uint16_t, uint16_t);
 
 extern int
-qla2x00_write_edc(scsi_qla_host_t *, uint16_t, uint16_t, dma_addr_t,
-    uint8_t *, uint16_t, uint16_t);
+qla2x00_write_sfp(scsi_qla_host_t *, dma_addr_t, uint8_t *,
+	uint16_t, uint16_t, uint16_t, uint16_t);
 
 extern int
 qla2x00_set_idma_speed(scsi_qla_host_t *, uint16_t, uint16_t, uint16_t *);
@@ -549,7 +550,6 @@ extern int qla82xx_wr_32(struct qla_hw_data *, ulong, u32);
 extern int qla82xx_rd_32(struct qla_hw_data *, ulong);
 extern int qla82xx_rdmem(struct qla_hw_data *, u64, void *, int);
 extern int qla82xx_wrmem(struct qla_hw_data *, u64, void *, int);
-extern void qla82xx_rom_unlock(struct qla_hw_data *);
 
 /* ISP 8021 IDC */
 extern void qla82xx_clear_drv_active(struct qla_hw_data *);
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 74a91b6dfc68..8cd9066ad906 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 8575808dbae0..920b76bfbb93 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -35,8 +35,6 @@ static int qla2x00_fabric_dev_login(scsi_qla_host_t *, fc_port_t *,
 
 static int qla2x00_restart_isp(scsi_qla_host_t *);
 
-static int qla2x00_find_new_loop_id(scsi_qla_host_t *, fc_port_t *);
-
 static struct qla_chip_state_84xx *qla84xx_get_chip(struct scsi_qla_host *);
 static int qla84xx_init_chip(scsi_qla_host_t *);
 static int qla25xx_init_queues(struct qla_hw_data *);
@@ -385,8 +383,18 @@ qla2x00_async_login_done(struct scsi_qla_host *vha, fc_port_t *fcport,
 
 	switch (data[0]) {
 	case MBS_COMMAND_COMPLETE:
+		/*
+		 * Driver must validate login state - If PRLI not complete,
+		 * force a relogin attempt via implicit LOGO, PLOGI, and PRLI
+		 * requests.
+		 */
+		rval = qla2x00_get_port_database(vha, fcport, 0);
+		if (rval != QLA_SUCCESS) {
+			qla2x00_post_async_logout_work(vha, fcport, NULL);
+			qla2x00_post_async_login_work(vha, fcport, NULL);
+			break;
+		}
 		if (fcport->flags & FCF_FCP2_DEVICE) {
-			fcport->flags |= FCF_ASYNC_SENT;
 			qla2x00_post_async_adisc_work(vha, fcport, data);
 			break;
 		}
@@ -397,7 +405,7 @@ qla2x00_async_login_done(struct scsi_qla_host *vha, fc_port_t *fcport,
 		if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
 			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 		else
-			qla2x00_mark_device_lost(vha, fcport, 1, 1);
+			qla2x00_mark_device_lost(vha, fcport, 1, 0);
 		break;
 	case MBS_PORT_ID_USED:
 		fcport->loop_id = data[1];
@@ -409,7 +417,7 @@ qla2x00_async_login_done(struct scsi_qla_host *vha, fc_port_t *fcport,
 		rval = qla2x00_find_new_loop_id(vha, fcport);
 		if (rval != QLA_SUCCESS) {
 			fcport->flags &= ~FCF_ASYNC_SENT;
-			qla2x00_mark_device_lost(vha, fcport, 1, 1);
+			qla2x00_mark_device_lost(vha, fcport, 1, 0);
 			break;
 		}
 		qla2x00_post_async_login_work(vha, fcport, NULL);
@@ -441,7 +449,7 @@ qla2x00_async_adisc_done(struct scsi_qla_host *vha, fc_port_t *fcport,
 	if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 	else
-		qla2x00_mark_device_lost(vha, fcport, 1, 1);
+		qla2x00_mark_device_lost(vha, fcport, 1, 0);
 
 	return;
 }
@@ -2536,7 +2544,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 	fcport->vp_idx = vha->vp_idx;
 	fcport->port_type = FCT_UNKNOWN;
 	fcport->loop_id = FC_NO_LOOP_ID;
-	atomic_set(&fcport->state, FCS_UNCONFIGURED);
+	qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED);
 	fcport->supported_classes = FC_COS_UNSPECIFIED;
 
 	return fcport;
@@ -2722,7 +2730,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 			    "loop_id=0x%04x\n",
 			    vha->host_no, fcport->loop_id));
 
-			atomic_set(&fcport->state, FCS_DEVICE_LOST);
+			qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
 		}
 	}
 
@@ -2934,7 +2942,7 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 	qla2x00_iidma_fcport(vha, fcport);
 	qla24xx_update_fcport_fcp_prio(vha, fcport);
 	qla2x00_reg_remote_port(vha, fcport);
-	atomic_set(&fcport->state, FCS_ONLINE);
+	qla2x00_set_fcport_state(fcport, FCS_ONLINE);
 }
 
 /*
@@ -3391,7 +3399,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
  * Context:
  *	Kernel context.
  */
-static int
+int
 qla2x00_find_new_loop_id(scsi_qla_host_t *vha, fc_port_t *dev)
 {
 	int	rval;
@@ -5202,7 +5210,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 	}
 
 	/* Reset Initialization control block */
-	memset(icb, 0, sizeof(struct init_cb_81xx));
+	memset(icb, 0, ha->init_cb_size);
 
 	/* Copy 1st segment. */
 	dptr1 = (uint8_t *)icb;
@@ -5427,6 +5435,13 @@ qla82xx_restart_isp(scsi_qla_host_t *vha)
 		ha->isp_abort_cnt = 0;
 		clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags);
 
+		/* Update the firmware version */
+		qla2x00_get_fw_version(vha, &ha->fw_major_version,
+		    &ha->fw_minor_version, &ha->fw_subminor_version,
+		    &ha->fw_attributes, &ha->fw_memory_size,
+		    ha->mpi_version, &ha->mpi_capabilities,
+		    ha->phy_version);
+
 		if (ha->fce) {
 			ha->flags.fce_enabled = 1;
 			memset(ha->fce, 0,
@@ -5508,26 +5523,26 @@ qla81xx_update_fw_options(scsi_qla_host_t *vha)
  *
  * Return:
  *	non-zero (if found)
- * 	0 (if not found)
+ *	-1 (if not found)
  *
  * Context:
  * 	Kernel context
  */
-uint8_t
+static int
 qla24xx_get_fcp_prio(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
 	int i, entries;
 	uint8_t pid_match, wwn_match;
-	uint8_t priority;
+	int priority;
 	uint32_t pid1, pid2;
 	uint64_t wwn1, wwn2;
 	struct qla_fcp_prio_entry *pri_entry;
 	struct qla_hw_data *ha = vha->hw;
 
 	if (!ha->fcp_prio_cfg || !ha->flags.fcp_prio_enabled)
-		return 0;
+		return -1;
 
-	priority = 0;
+	priority = -1;
 	entries = ha->fcp_prio_cfg->num_entries;
 	pri_entry = &ha->fcp_prio_cfg->entry[0];
 
@@ -5610,7 +5625,7 @@ int
 qla24xx_update_fcport_fcp_prio(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
 	int ret;
-	uint8_t priority;
+	int priority;
 	uint16_t mb[5];
 
 	if (fcport->port_type != FCT_TARGET ||
@@ -5618,6 +5633,9 @@ qla24xx_update_fcport_fcp_prio(scsi_qla_host_t *vha, fc_port_t *fcport)
 		return QLA_FUNCTION_FAILED;
 
 	priority = qla24xx_get_fcp_prio(vha, fcport);
+	if (priority < 0)
+		return QLA_FUNCTION_FAILED;
+
 	ret = qla24xx_set_fcp_prio(vha, fcport->loop_id, priority, mb);
 	if (ret == QLA_SUCCESS)
 		fcport->fcp_prio = priority;
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 48f97a92e33d..4c8167e11f69 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -83,3 +83,22 @@ qla2x00_clean_dsd_pool(struct qla_hw_data *ha, srb_t *sp)
 	}
 	INIT_LIST_HEAD(&((struct crc_context *)sp->ctx)->dsd_list);
 }
+
+static inline void
+qla2x00_set_fcport_state(fc_port_t *fcport, int state)
+{
+	int old_state;
+
+	old_state = atomic_read(&fcport->state);
+	atomic_set(&fcport->state, state);
+
+	/* Don't print state transitions during initial allocation of fcport */
+	if (old_state && old_state != state) {
+		DEBUG(qla_printk(KERN_WARNING, fcport->vha->hw,
+		    "scsi(%ld): FCPort state transitioned from %s to %s - "
+		    "portid=%02x%02x%02x.\n", fcport->vha->host_no,
+		    port_state_str[old_state], port_state_str[state],
+		    fcport->d_id.b.domain, fcport->d_id.b.area,
+		    fcport->d_id.b.al_pa));
+	}
+}
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index d78d5896fc33..7bac3cd109d6 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 712518d05128..9c0f0e3389eb 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -843,7 +843,10 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 		qla_printk(KERN_WARNING, ha,
 		    "Invalid SCSI completion handle %d.\n", index);
 
-		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		if (IS_QLA82XX(ha))
+			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+		else
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 		return;
 	}
 
@@ -861,7 +864,10 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 		qla_printk(KERN_WARNING, ha,
 		    "Invalid ISP SCSI completion handle\n");
 
-		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		if (IS_QLA82XX(ha))
+			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+		else
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 	}
 }
 
@@ -878,7 +884,10 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
 	if (index >= MAX_OUTSTANDING_COMMANDS) {
 		qla_printk(KERN_WARNING, ha,
 		    "%s: Invalid completion handle (%x).\n", func, index);
-		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		if (IS_QLA82XX(ha))
+			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+		else
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 		goto done;
 	}
 	sp = req->outstanding_cmds[index];
@@ -1564,7 +1573,10 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    "scsi(%ld): Invalid status handle (0x%x).\n", vha->host_no,
 		    sts->handle);
 
-		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		if (IS_QLA82XX(ha))
+			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+		else
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 		qla2xxx_wake_dpc(vha);
 		return;
 	}
@@ -1794,12 +1806,13 @@ out:
 	if (logit)
 		DEBUG2(qla_printk(KERN_INFO, ha,
 		    "scsi(%ld:%d:%d) FCP command status: 0x%x-0x%x (0x%x) "
-		    "oxid=0x%x cdb=%02x%02x%02x len=0x%x "
+		    "portid=%02x%02x%02x oxid=0x%x cdb=%02x%02x%02x len=0x%x "
 		    "rsp_info=0x%x resid=0x%x fw_resid=0x%x\n", vha->host_no,
 		    cp->device->id, cp->device->lun, comp_status, scsi_status,
-		    cp->result, ox_id, cp->cmnd[0],
-		    cp->cmnd[1], cp->cmnd[2], scsi_bufflen(cp), rsp_info_len,
-		    resid_len, fw_resid_len));
+		    cp->result, fcport->d_id.b.domain, fcport->d_id.b.area,
+		    fcport->d_id.b.al_pa, ox_id, cp->cmnd[0], cp->cmnd[1],
+		    cp->cmnd[2], scsi_bufflen(cp), rsp_info_len, resid_len,
+		    fw_resid_len));
 
 	if (rsp->status_srb == NULL)
 		qla2x00_sp_compl(ha, sp);
@@ -1908,13 +1921,17 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 		qla2x00_sp_compl(ha, sp);
 
 	} else if (pkt->entry_type == COMMAND_A64_TYPE || pkt->entry_type ==
-	    COMMAND_TYPE || pkt->entry_type == COMMAND_TYPE_7) {
+		COMMAND_TYPE || pkt->entry_type == COMMAND_TYPE_7
+		|| pkt->entry_type == COMMAND_TYPE_6) {
 		DEBUG2(printk("scsi(%ld): Error entry - invalid handle\n",
-		    vha->host_no));
+			vha->host_no));
 		qla_printk(KERN_WARNING, ha,
-		    "Error entry - invalid handle\n");
+			"Error entry - invalid handle\n");
 
-		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		if (IS_QLA82XX(ha))
+			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+		else
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 		qla2xxx_wake_dpc(vha);
 	}
 }
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 34893397ac84..c26f0acdfecc 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -1261,11 +1261,12 @@ qla2x00_get_port_database(scsi_qla_host_t *vha, fc_port_t *fcport, uint8_t opt)
 		/* Check for logged in state. */
 		if (pd24->current_login_state != PDS_PRLI_COMPLETE &&
 		    pd24->last_login_state != PDS_PRLI_COMPLETE) {
-			DEBUG2(printk("%s(%ld): Unable to verify "
-			    "login-state (%x/%x) for loop_id %x\n",
-			    __func__, vha->host_no,
-			    pd24->current_login_state,
-			    pd24->last_login_state, fcport->loop_id));
+			DEBUG2(qla_printk(KERN_WARNING, ha,
+			   "scsi(%ld): Unable to verify login-state (%x/%x) "
+			   " - portid=%02x%02x%02x.\n", vha->host_no,
+			   pd24->current_login_state, pd24->last_login_state,
+			   fcport->d_id.b.domain, fcport->d_id.b.area,
+			   fcport->d_id.b.al_pa));
 			rval = QLA_FUNCTION_FAILED;
 			goto gpd_error_out;
 		}
@@ -1289,6 +1290,12 @@ qla2x00_get_port_database(scsi_qla_host_t *vha, fc_port_t *fcport, uint8_t opt)
 		/* Check for logged in state. */
 		if (pd->master_state != PD_STATE_PORT_LOGGED_IN &&
 		    pd->slave_state != PD_STATE_PORT_LOGGED_IN) {
+			DEBUG2(qla_printk(KERN_WARNING, ha,
+			   "scsi(%ld): Unable to verify login-state (%x/%x) "
+			   " - portid=%02x%02x%02x.\n", vha->host_no,
+			   pd->master_state, pd->slave_state,
+			   fcport->d_id.b.domain, fcport->d_id.b.area,
+			   fcport->d_id.b.al_pa));
 			rval = QLA_FUNCTION_FAILED;
 			goto gpd_error_out;
 		}
@@ -1883,7 +1890,8 @@ qla24xx_fabric_logout(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t domain,
 	lg->handle = MAKE_HANDLE(req->id, lg->handle);
 	lg->nport_handle = cpu_to_le16(loop_id);
 	lg->control_flags =
-	    __constant_cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
+	    __constant_cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO|
+		LCF_FREE_NPORT);
 	lg->port_id[0] = al_pa;
 	lg->port_id[1] = area;
 	lg->port_id[2] = domain;
@@ -2362,7 +2370,7 @@ qla24xx_abort_command(srb_t *sp)
 	abt->entry_count = 1;
 	abt->handle = MAKE_HANDLE(req->id, abt->handle);
 	abt->nport_handle = cpu_to_le16(fcport->loop_id);
-	abt->handle_to_abort = handle;
+	abt->handle_to_abort = MAKE_HANDLE(req->id, handle);
 	abt->port_id[0] = fcport->d_id.b.al_pa;
 	abt->port_id[1] = fcport->d_id.b.area;
 	abt->port_id[2] = fcport->d_id.b.domain;
@@ -2779,44 +2787,6 @@ qla2x00_disable_fce_trace(scsi_qla_host_t *vha, uint64_t *wr, uint64_t *rd)
 }
 
 int
-qla2x00_read_sfp(scsi_qla_host_t *vha, dma_addr_t sfp_dma, uint16_t addr,
-    uint16_t off, uint16_t count)
-{
-	int rval;
-	mbx_cmd_t mc;
-	mbx_cmd_t *mcp = &mc;
-
-	if (!IS_FWI2_CAPABLE(vha->hw))
-		return QLA_FUNCTION_FAILED;
-
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
-
-	mcp->mb[0] = MBC_READ_SFP;
-	mcp->mb[1] = addr;
-	mcp->mb[2] = MSW(sfp_dma);
-	mcp->mb[3] = LSW(sfp_dma);
-	mcp->mb[6] = MSW(MSD(sfp_dma));
-	mcp->mb[7] = LSW(MSD(sfp_dma));
-	mcp->mb[8] = count;
-	mcp->mb[9] = off;
-	mcp->mb[10] = 0;
-	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
-	mcp->in_mb = MBX_0;
-	mcp->tov = MBX_TOV_SECONDS;
-	mcp->flags = 0;
-	rval = qla2x00_mailbox_command(vha, mcp);
-
-	if (rval != QLA_SUCCESS) {
-		DEBUG2_3_11(printk("%s(%ld): failed=%x (%x).\n", __func__,
-		    vha->host_no, rval, mcp->mb[0]));
-	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
-	}
-
-	return rval;
-}
-
-int
 qla2x00_get_idma_speed(scsi_qla_host_t *vha, uint16_t loop_id,
 	uint16_t *port_speed, uint16_t *mb)
 {
@@ -3581,15 +3551,22 @@ qla81xx_restart_mpi_firmware(scsi_qla_host_t *vha)
 }
 
 int
-qla2x00_read_edc(scsi_qla_host_t *vha, uint16_t dev, uint16_t adr,
-    dma_addr_t sfp_dma, uint8_t *sfp, uint16_t len, uint16_t opt)
+qla2x00_read_sfp(scsi_qla_host_t *vha, dma_addr_t sfp_dma, uint8_t *sfp,
+	uint16_t dev, uint16_t off, uint16_t len, uint16_t opt)
 {
 	int rval;
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!IS_FWI2_CAPABLE(ha))
+		return QLA_FUNCTION_FAILED;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
+	if (len == 1)
+		opt |= BIT_0;
+
 	mcp->mb[0] = MBC_READ_SFP;
 	mcp->mb[1] = dev;
 	mcp->mb[2] = MSW(sfp_dma);
@@ -3597,17 +3574,16 @@ qla2x00_read_edc(scsi_qla_host_t *vha, uint16_t dev, uint16_t adr,
 	mcp->mb[6] = MSW(MSD(sfp_dma));
 	mcp->mb[7] = LSW(MSD(sfp_dma));
 	mcp->mb[8] = len;
-	mcp->mb[9] = adr;
+	mcp->mb[9] = off;
 	mcp->mb[10] = opt;
 	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
-	mcp->in_mb = MBX_0;
+	mcp->in_mb = MBX_1|MBX_0;
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
 
 	if (opt & BIT_0)
-		if (sfp)
-			*sfp = mcp->mb[8];
+		*sfp = mcp->mb[1];
 
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed=%x (%x).\n", __func__,
@@ -3620,18 +3596,24 @@ qla2x00_read_edc(scsi_qla_host_t *vha, uint16_t dev, uint16_t adr,
 }
 
 int
-qla2x00_write_edc(scsi_qla_host_t *vha, uint16_t dev, uint16_t adr,
-    dma_addr_t sfp_dma, uint8_t *sfp, uint16_t len, uint16_t opt)
+qla2x00_write_sfp(scsi_qla_host_t *vha, dma_addr_t sfp_dma, uint8_t *sfp,
+	uint16_t dev, uint16_t off, uint16_t len, uint16_t opt)
 {
 	int rval;
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!IS_FWI2_CAPABLE(ha))
+		return QLA_FUNCTION_FAILED;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
+	if (len == 1)
+		opt |= BIT_0;
+
 	if (opt & BIT_0)
-		if (sfp)
-			len = *sfp;
+		len = *sfp;
 
 	mcp->mb[0] = MBC_WRITE_SFP;
 	mcp->mb[1] = dev;
@@ -3640,10 +3622,10 @@ qla2x00_write_edc(scsi_qla_host_t *vha, uint16_t dev, uint16_t adr,
 	mcp->mb[6] = MSW(MSD(sfp_dma));
 	mcp->mb[7] = LSW(MSD(sfp_dma));
 	mcp->mb[8] = len;
-	mcp->mb[9] = adr;
+	mcp->mb[9] = off;
 	mcp->mb[10] = opt;
 	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
-	mcp->in_mb = MBX_0;
+	mcp->in_mb = MBX_1|MBX_0;
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -4160,63 +4142,32 @@ int
 qla2x00_get_thermal_temp(scsi_qla_host_t *vha, uint16_t *temp, uint16_t *frac)
 {
 	int rval;
-	mbx_cmd_t mc;
-	mbx_cmd_t *mcp = &mc;
+	uint8_t byte;
 	struct qla_hw_data *ha = vha->hw;
 
-	DEBUG11(printk(KERN_INFO "%s(%ld): entered.\n", __func__, ha->host_no));
+	DEBUG11(printk(KERN_INFO "%s(%ld): entered.\n", __func__, vha->host_no));
 
-	/* High bits. */
-	mcp->mb[0] = MBC_READ_SFP;
-	mcp->mb[1] = 0x98;
-	mcp->mb[2] = 0;
-	mcp->mb[3] = 0;
-	mcp->mb[6] = 0;
-	mcp->mb[7] = 0;
-	mcp->mb[8] = 1;
-	mcp->mb[9] = 0x01;
-	mcp->mb[10] = BIT_13|BIT_0;
-	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
-	mcp->in_mb = MBX_1|MBX_0;
-	mcp->tov = MBX_TOV_SECONDS;
-	mcp->flags = 0;
-	rval = qla2x00_mailbox_command(vha, mcp);
+	/* Integer part */
+	rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x01, 1, BIT_13|BIT_0);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk(KERN_WARNING
-		    "%s(%ld): failed=%x (%x).\n", __func__,
-		    vha->host_no, rval, mcp->mb[0]));
+		    "%s(%ld): failed=%x.\n", __func__, vha->host_no, rval));
 		ha->flags.thermal_supported = 0;
 		goto fail;
 	}
-	*temp = mcp->mb[1] & 0xFF;
+	*temp = byte;
 
-	/* Low bits. */
-	mcp->mb[0] = MBC_READ_SFP;
-	mcp->mb[1] = 0x98;
-	mcp->mb[2] = 0;
-	mcp->mb[3] = 0;
-	mcp->mb[6] = 0;
-	mcp->mb[7] = 0;
-	mcp->mb[8] = 1;
-	mcp->mb[9] = 0x10;
-	mcp->mb[10] = BIT_13|BIT_0;
-	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
-	mcp->in_mb = MBX_1|MBX_0;
-	mcp->tov = MBX_TOV_SECONDS;
-	mcp->flags = 0;
-	rval = qla2x00_mailbox_command(vha, mcp);
+	/* Fraction part */
+	rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x10, 1, BIT_13|BIT_0);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk(KERN_WARNING
-		    "%s(%ld): failed=%x (%x).\n", __func__,
-		    vha->host_no, rval, mcp->mb[0]));
+		    "%s(%ld): failed=%x.\n", __func__, vha->host_no, rval));
 		ha->flags.thermal_supported = 0;
 		goto fail;
 	}
-	*frac = ((mcp->mb[1] & 0xFF) >> 6) * 25;
+	*frac = (byte >> 6) * 25;
 
-	if (rval == QLA_SUCCESS)
-		DEBUG11(printk(KERN_INFO
-		    "%s(%ld): done.\n", __func__, ha->host_no));
+	DEBUG11(printk(KERN_INFO "%s(%ld): done.\n", __func__, vha->host_no));
 fail:
 	return rval;
 }
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 2b69392a71a1..5e343919acad 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -136,7 +136,7 @@ qla2x00_mark_vp_devices_dead(scsi_qla_host_t *vha)
 		    vha->host_no, fcport->loop_id, fcport->vp_idx));
 
 		qla2x00_mark_device_lost(vha, fcport, 0, 0);
-		atomic_set(&fcport->state, FCS_UNCONFIGURED);
+		qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED);
 	}
 }
 
@@ -456,7 +456,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport)
 	else
 		host->max_cmd_len = MAX_CMDSZ;
 	host->max_channel = MAX_BUSES - 1;
-	host->max_lun = MAX_LUNS;
+	host->max_lun = ql2xmaxlun;
 	host->unique_id = host->host_no;
 	host->max_id = MAX_TARGETS_2200;
 	host->transportt = qla2xxx_transport_vport_template;
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 455fe134d31d..e1138bcc834c 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -844,6 +844,12 @@ qla82xx_rom_lock(struct qla_hw_data *ha)
 	return 0;
 }
 
+static void
+qla82xx_rom_unlock(struct qla_hw_data *ha)
+{
+	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+}
+
 static int
 qla82xx_wait_rom_busy(struct qla_hw_data *ha)
 {
@@ -924,7 +930,7 @@ qla82xx_rom_fast_read(struct qla_hw_data *ha, int addr, int *valp)
 		return -1;
 	}
 	ret = qla82xx_do_rom_fast_read(ha, addr, valp);
-	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+	qla82xx_rom_unlock(ha);
 	return ret;
 }
 
@@ -1056,7 +1062,7 @@ qla82xx_write_flash_dword(struct qla_hw_data *ha, uint32_t flashaddr,
 	ret = qla82xx_flash_wait_write_finish(ha);
 
 done_write:
-	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+	qla82xx_rom_unlock(ha);
 	return ret;
 }
 
@@ -1081,12 +1087,26 @@ qla82xx_pinit_from_rom(scsi_qla_host_t *vha)
 	/* Halt all the indiviual PEGs and other blocks of the ISP */
 	qla82xx_rom_lock(ha);
 
-	/* mask all niu interrupts */
+	/* disable all I2Q */
+	qla82xx_wr_32(ha, QLA82XX_CRB_I2Q + 0x10, 0x0);
+	qla82xx_wr_32(ha, QLA82XX_CRB_I2Q + 0x14, 0x0);
+	qla82xx_wr_32(ha, QLA82XX_CRB_I2Q + 0x18, 0x0);
+	qla82xx_wr_32(ha, QLA82XX_CRB_I2Q + 0x1c, 0x0);
+	qla82xx_wr_32(ha, QLA82XX_CRB_I2Q + 0x20, 0x0);
+	qla82xx_wr_32(ha, QLA82XX_CRB_I2Q + 0x24, 0x0);
+
+	/* disable all niu interrupts */
 	qla82xx_wr_32(ha, QLA82XX_CRB_NIU + 0x40, 0xff);
 	/* disable xge rx/tx */
 	qla82xx_wr_32(ha, QLA82XX_CRB_NIU + 0x70000, 0x00);
 	/* disable xg1 rx/tx */
 	qla82xx_wr_32(ha, QLA82XX_CRB_NIU + 0x80000, 0x00);
+	/* disable sideband mac */
+	qla82xx_wr_32(ha, QLA82XX_CRB_NIU + 0x90000, 0x00);
+	/* disable ap0 mac */
+	qla82xx_wr_32(ha, QLA82XX_CRB_NIU + 0xa0000, 0x00);
+	/* disable ap1 mac */
+	qla82xx_wr_32(ha, QLA82XX_CRB_NIU + 0xb0000, 0x00);
 
 	/* halt sre */
 	val = qla82xx_rd_32(ha, QLA82XX_CRB_SRE + 0x1000);
@@ -1101,6 +1121,7 @@ qla82xx_pinit_from_rom(scsi_qla_host_t *vha)
 	qla82xx_wr_32(ha, QLA82XX_CRB_TIMER + 0x10, 0x0);
 	qla82xx_wr_32(ha, QLA82XX_CRB_TIMER + 0x18, 0x0);
 	qla82xx_wr_32(ha, QLA82XX_CRB_TIMER + 0x100, 0x0);
+	qla82xx_wr_32(ha, QLA82XX_CRB_TIMER + 0x200, 0x0);
 
 	/* halt pegs */
 	qla82xx_wr_32(ha, QLA82XX_CRB_PEG_NET_0 + 0x3c, 1);
@@ -1108,9 +1129,9 @@ qla82xx_pinit_from_rom(scsi_qla_host_t *vha)
 	qla82xx_wr_32(ha, QLA82XX_CRB_PEG_NET_2 + 0x3c, 1);
 	qla82xx_wr_32(ha, QLA82XX_CRB_PEG_NET_3 + 0x3c, 1);
 	qla82xx_wr_32(ha, QLA82XX_CRB_PEG_NET_4 + 0x3c, 1);
+	msleep(20);
 
 	/* big hammer */
-	msleep(1000);
 	if (test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags))
 		/* don't reset CAM block on reset */
 		qla82xx_wr_32(ha, QLA82XX_ROMUSB_GLB_SW_RESET, 0xfeffffff);
@@ -1129,7 +1150,7 @@ qla82xx_pinit_from_rom(scsi_qla_host_t *vha)
 	qla82xx_wr_32(ha, QLA82XX_CRB_QDR_NET + 0xe4, val);
 	msleep(20);
 
-	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+	qla82xx_rom_unlock(ha);
 
 	/* Read the signature value from the flash.
 	 * Offset 0: Contain signature (0xcafecafe)
@@ -2395,9 +2416,13 @@ qla82xx_load_fw(scsi_qla_host_t *vha)
 
 	if (qla82xx_fw_load_from_flash(ha) == QLA_SUCCESS) {
 		qla_printk(KERN_ERR, ha,
-			"Firmware loaded successfully from flash\n");
+		    "Firmware loaded successfully from flash\n");
 		return QLA_SUCCESS;
+	} else {
+		qla_printk(KERN_ERR, ha,
+		    "Firmware load from flash failed\n");
 	}
+
 try_blob_fw:
 	qla_printk(KERN_INFO, ha,
 	    "Attempting to load firmware from blob\n");
@@ -2548,11 +2573,11 @@ qla2xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
 			dsd_seg = (uint32_t *)&cmd_pkt->fcp_data_dseg_address;
 			*dsd_seg++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
 			*dsd_seg++ = cpu_to_le32(MSD(dsd_ptr->dsd_list_dma));
-			cmd_pkt->fcp_data_dseg_len = dsd_list_len;
+			*dsd_seg++ = cpu_to_le32(dsd_list_len);
 		} else {
 			*cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
 			*cur_dsd++ = cpu_to_le32(MSD(dsd_ptr->dsd_list_dma));
-			*cur_dsd++ = dsd_list_len;
+			*cur_dsd++ = cpu_to_le32(dsd_list_len);
 		}
 		cur_dsd = (uint32_t *)next_dsd;
 		while (avail_dsds) {
@@ -2991,7 +3016,7 @@ qla82xx_unprotect_flash(struct qla_hw_data *ha)
 		qla_printk(KERN_WARNING, ha, "Write disable failed\n");
 
 done_unprotect:
-	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+	qla82xx_rom_unlock(ha);
 	return ret;
 }
 
@@ -3020,7 +3045,7 @@ qla82xx_protect_flash(struct qla_hw_data *ha)
 	if (qla82xx_write_disable_flash(ha) != 0)
 		qla_printk(KERN_WARNING, ha, "Write disable failed\n");
 done_protect:
-	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+	qla82xx_rom_unlock(ha);
 	return ret;
 }
 
@@ -3048,7 +3073,7 @@ qla82xx_erase_sector(struct qla_hw_data *ha, int addr)
 	}
 	ret = qla82xx_flash_wait_write_finish(ha);
 done:
-	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+	qla82xx_rom_unlock(ha);
 	return ret;
 }
 
@@ -3228,7 +3253,7 @@ void qla82xx_rom_lock_recovery(struct qla_hw_data *ha)
 	 * else died while holding it.
 	 * In either case, unlock.
 	 */
-	qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_UNLOCK));
+	qla82xx_rom_unlock(ha);
 }
 
 /*
@@ -3528,15 +3553,18 @@ int
 qla82xx_device_state_handler(scsi_qla_host_t *vha)
 {
 	uint32_t dev_state;
+	uint32_t old_dev_state;
 	int rval = QLA_SUCCESS;
 	unsigned long dev_init_timeout;
 	struct qla_hw_data *ha = vha->hw;
+	int loopcount = 0;
 
 	qla82xx_idc_lock(ha);
 	if (!vha->flags.init_done)
 		qla82xx_set_drv_active(vha);
 
 	dev_state = qla82xx_rd_32(ha, QLA82XX_CRB_DEV_STATE);
+	old_dev_state = dev_state;
 	qla_printk(KERN_INFO, ha, "1:Device state is 0x%x = %s\n", dev_state,
 		dev_state < MAX_STATES ? qdev_state[dev_state] : "Unknown");
 
@@ -3553,10 +3581,16 @@ qla82xx_device_state_handler(scsi_qla_host_t *vha)
 			break;
 		}
 		dev_state = qla82xx_rd_32(ha, QLA82XX_CRB_DEV_STATE);
-		qla_printk(KERN_INFO, ha,
-			"2:Device state is 0x%x = %s\n", dev_state,
-			dev_state < MAX_STATES ?
-			qdev_state[dev_state] : "Unknown");
+		if (old_dev_state != dev_state) {
+			loopcount = 0;
+			old_dev_state = dev_state;
+		}
+		if (loopcount < 5) {
+			qla_printk(KERN_INFO, ha,
+			    "2:Device state is 0x%x = %s\n", dev_state,
+			    dev_state < MAX_STATES ?
+			    qdev_state[dev_state] : "Unknown");
+		}
 
 		switch (dev_state) {
 		case QLA82XX_DEV_READY:
@@ -3570,6 +3604,7 @@ qla82xx_device_state_handler(scsi_qla_host_t *vha)
 			qla82xx_idc_lock(ha);
 			break;
 		case QLA82XX_DEV_NEED_RESET:
+		    if (!ql2xdontresethba)
 			qla82xx_need_reset_handler(vha);
 			dev_init_timeout = jiffies +
 				(ha->nx_dev_init_timeout * HZ);
@@ -3604,6 +3639,7 @@ qla82xx_device_state_handler(scsi_qla_host_t *vha)
 			msleep(1000);
 			qla82xx_idc_lock(ha);
 		}
+		loopcount++;
 	}
 exit:
 	qla82xx_idc_unlock(ha);
@@ -3621,7 +3657,8 @@ void qla82xx_watchdog(scsi_qla_host_t *vha)
 		if (dev_state == QLA82XX_DEV_NEED_RESET &&
 		    !test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags)) {
 			qla_printk(KERN_WARNING, ha,
-			    "%s(): Adapter reset needed!\n", __func__);
+			    "scsi(%ld) %s: Adapter reset needed!\n",
+				vha->host_no, __func__);
 			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 			qla2xxx_wake_dpc(vha);
 		} else if (dev_state == QLA82XX_DEV_NEED_QUIESCENT &&
@@ -3632,10 +3669,27 @@ void qla82xx_watchdog(scsi_qla_host_t *vha)
 			set_bit(ISP_QUIESCE_NEEDED, &vha->dpc_flags);
 			qla2xxx_wake_dpc(vha);
 		} else {
-			qla82xx_check_fw_alive(vha);
 			if (qla82xx_check_fw_alive(vha)) {
 				halt_status = qla82xx_rd_32(ha,
 				    QLA82XX_PEG_HALT_STATUS1);
+				qla_printk(KERN_INFO, ha,
+				    "scsi(%ld): %s, Dumping hw/fw registers:\n "
+				    " PEG_HALT_STATUS1: 0x%x, PEG_HALT_STATUS2: 0x%x,\n "
+				    " PEG_NET_0_PC: 0x%x, PEG_NET_1_PC: 0x%x,\n "
+				    " PEG_NET_2_PC: 0x%x, PEG_NET_3_PC: 0x%x,\n "
+				    " PEG_NET_4_PC: 0x%x\n",
+				    vha->host_no, __func__, halt_status,
+				    qla82xx_rd_32(ha, QLA82XX_PEG_HALT_STATUS2),
+				    qla82xx_rd_32(ha,
+					    QLA82XX_CRB_PEG_NET_0 + 0x3c),
+				    qla82xx_rd_32(ha,
+					    QLA82XX_CRB_PEG_NET_1 + 0x3c),
+				    qla82xx_rd_32(ha,
+					    QLA82XX_CRB_PEG_NET_2 + 0x3c),
+				    qla82xx_rd_32(ha,
+					    QLA82XX_CRB_PEG_NET_3 + 0x3c),
+				    qla82xx_rd_32(ha,
+					    QLA82XX_CRB_PEG_NET_4 + 0x3c));
 				if (halt_status & HALT_STATUS_UNRECOVERABLE) {
 					set_bit(ISP_UNRECOVERABLE,
 					    &vha->dpc_flags);
@@ -3651,8 +3705,9 @@ void qla82xx_watchdog(scsi_qla_host_t *vha)
 				if (ha->flags.mbox_busy) {
 					ha->flags.mbox_int = 1;
 					DEBUG2(qla_printk(KERN_ERR, ha,
-					    "Due to fw hung, doing premature "
-					    "completion of mbx command\n"));
+					    "scsi(%ld) Due to fw hung, doing "
+					    "premature completion of mbx "
+					    "command\n", vha->host_no));
 					if (test_bit(MBX_INTR_WAIT,
 					    &ha->mbx_cmd_flags))
 						complete(&ha->mbx_intr_comp);
diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h
index ed5883f1778a..8a21832c6693 100644
--- a/drivers/scsi/qla2xxx/qla_nx.h
+++ b/drivers/scsi/qla2xxx/qla_nx.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index aa7747529165..f461925a9dfc 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -164,6 +164,20 @@ module_param(ql2xasynctmfenable, int, S_IRUGO);
 MODULE_PARM_DESC(ql2xasynctmfenable,
 		"Enables issue of TM IOCBs asynchronously via IOCB mechanism"
 		"Default is 0 - Issue TM IOCBs via mailbox mechanism.");
+
+int ql2xdontresethba;
+module_param(ql2xdontresethba, int, S_IRUGO);
+MODULE_PARM_DESC(ql2xdontresethba,
+	"Option to specify reset behaviour\n"
+	" 0 (Default) -- Reset on failure.\n"
+	" 1 -- Do not reset on failure.\n");
+
+uint ql2xmaxlun = MAX_LUNS;
+module_param(ql2xmaxlun, uint, S_IRUGO);
+MODULE_PARM_DESC(ql2xmaxlun,
+		"Defines the maximum LU number to register with the SCSI "
+		"midlayer. Default is 65535.");
+
 /*
  * SCSI host template entry points
  */
@@ -528,7 +542,7 @@ qla2x00_get_new_sp(scsi_qla_host_t *vha, fc_port_t *fcport,
 static int
 qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 {
-	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
+	scsi_qla_host_t *vha = shost_priv(host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmd->device));
 	struct qla_hw_data *ha = vha->hw;
@@ -2128,7 +2142,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	else
 		host->max_cmd_len = MAX_CMDSZ;
 	host->max_channel = MAX_BUSES - 1;
-	host->max_lun = MAX_LUNS;
+	host->max_lun = ql2xmaxlun;
 	host->transportt = qla2xxx_transport_template;
 	sht->vendor_id = (SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_QLOGIC);
 
@@ -2360,21 +2374,26 @@ qla2x00_remove_one(struct pci_dev *pdev)
 	base_vha = pci_get_drvdata(pdev);
 	ha = base_vha->hw;
 
-	spin_lock_irqsave(&ha->vport_slock, flags);
-	list_for_each_entry(vha, &ha->vp_list, list) {
-		atomic_inc(&vha->vref_count);
+	mutex_lock(&ha->vport_lock);
+	while (ha->cur_vport_count) {
+		struct Scsi_Host *scsi_host;
 
-		if (vha->fc_vport) {
-			spin_unlock_irqrestore(&ha->vport_slock, flags);
+		spin_lock_irqsave(&ha->vport_slock, flags);
 
-			fc_vport_terminate(vha->fc_vport);
+		BUG_ON(base_vha->list.next == &ha->vp_list);
+		/* This assumes first entry in ha->vp_list is always base vha */
+		vha = list_first_entry(&base_vha->list, scsi_qla_host_t, list);
+		scsi_host = scsi_host_get(vha->host);
 
-			spin_lock_irqsave(&ha->vport_slock, flags);
-		}
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+		mutex_unlock(&ha->vport_lock);
+
+		fc_vport_terminate(vha->fc_vport);
+		scsi_host_put(vha->host);
 
-		atomic_dec(&vha->vref_count);
+		mutex_lock(&ha->vport_lock);
 	}
-	spin_unlock_irqrestore(&ha->vport_slock, flags);
+	mutex_unlock(&ha->vport_lock);
 
 	set_bit(UNLOADING, &base_vha->dpc_flags);
 
@@ -2544,7 +2563,7 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, fc_port_t *fcport,
 {
 	if (atomic_read(&fcport->state) == FCS_ONLINE &&
 	    vha->vp_idx == fcport->vp_idx) {
-		atomic_set(&fcport->state, FCS_DEVICE_LOST);
+		qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
 		qla2x00_schedule_rport_del(vha, fcport, defer);
 	}
 	/*
@@ -2552,7 +2571,7 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, fc_port_t *fcport,
 	 * port but do the retries.
 	 */
 	if (atomic_read(&fcport->state) != FCS_DEVICE_DEAD)
-		atomic_set(&fcport->state, FCS_DEVICE_LOST);
+		qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
 
 	if (!do_login)
 		return;
@@ -2607,7 +2626,7 @@ qla2x00_mark_all_devices_lost(scsi_qla_host_t *vha, int defer)
 		if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD)
 			continue;
 		if (atomic_read(&fcport->state) == FCS_ONLINE) {
-			atomic_set(&fcport->state, FCS_DEVICE_LOST);
+			qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
 			if (defer)
 				qla2x00_schedule_rport_del(vha, fcport, defer);
 			else if (vha->vp_idx == fcport->vp_idx)
@@ -3214,6 +3233,17 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
 							fcport->d_id.b.area,
 							fcport->d_id.b.al_pa);
 
+				if (fcport->loop_id == FC_NO_LOOP_ID) {
+					fcport->loop_id = next_loopid =
+					    ha->min_external_loopid;
+					status = qla2x00_find_new_loop_id(
+					    vha, fcport);
+					if (status != QLA_SUCCESS) {
+						/* Ran out of IDs to use */
+						break;
+					}
+				}
+
 				if (IS_ALOGIO_CAPABLE(ha)) {
 					fcport->flags |= FCF_ASYNC_SENT;
 					data[0] = 0;
@@ -3604,7 +3634,8 @@ qla2x00_timer(scsi_qla_host_t *vha)
 	if (!pci_channel_offline(ha->pdev))
 		pci_read_config_word(ha->pdev, PCI_VENDOR_ID, &w);
 
-	if (IS_QLA82XX(ha)) {
+	/* Make sure qla82xx_watchdog is run only for physical port */
+	if (!vha->vp_idx && IS_QLA82XX(ha)) {
 		if (test_bit(ISP_QUIESCE_NEEDED, &vha->dpc_flags))
 			start_dpc++;
 		qla82xx_watchdog(vha);
@@ -3612,7 +3643,8 @@ qla2x00_timer(scsi_qla_host_t *vha)
 
 	/* Loop down handler. */
 	if (atomic_read(&vha->loop_down_timer) > 0 &&
-	    !(test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags))
+	    !(test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) &&
+	    !(test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags))
 		&& vha->flags.online) {
 
 		if (atomic_read(&vha->loop_down_timer) ==
@@ -3648,7 +3680,11 @@ qla2x00_timer(scsi_qla_host_t *vha)
 					if (!(sfcp->flags & FCF_FCP2_DEVICE))
 						continue;
 
-					set_bit(ISP_ABORT_NEEDED,
+					if (IS_QLA82XX(ha))
+						set_bit(FCOE_CTX_RESET_NEEDED,
+							&vha->dpc_flags);
+					else
+						set_bit(ISP_ABORT_NEEDED,
 							&vha->dpc_flags);
 					break;
 				}
@@ -3667,7 +3703,12 @@ qla2x00_timer(scsi_qla_host_t *vha)
 				qla_printk(KERN_WARNING, ha,
 				    "Loop down - aborting ISP.\n");
 
-				set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+				if (IS_QLA82XX(ha))
+					set_bit(FCOE_CTX_RESET_NEEDED,
+						&vha->dpc_flags);
+				else
+					set_bit(ISP_ABORT_NEEDED,
+						&vha->dpc_flags);
 			}
 		}
 		DEBUG3(printk("scsi(%ld): Loop Down - seconds remaining %d\n",
@@ -3675,8 +3716,8 @@ qla2x00_timer(scsi_qla_host_t *vha)
 		    atomic_read(&vha->loop_down_timer)));
 	}
 
-	/* Check if beacon LED needs to be blinked */
-	if (ha->beacon_blink_led == 1) {
+	/* Check if beacon LED needs to be blinked for physical host only */
+	if (!vha->vp_idx && (ha->beacon_blink_led == 1)) {
 		set_bit(BEACON_BLINK_NEEDED, &vha->dpc_flags);
 		start_dpc++;
 	}
diff --git a/drivers/scsi/qla2xxx/qla_settings.h b/drivers/scsi/qla2xxx/qla_settings.h
index f0b2b9986a55..d70f03008981 100644
--- a/drivers/scsi/qla2xxx/qla_settings.h
+++ b/drivers/scsi/qla2xxx/qla_settings.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 22070621206c..693647661ed1 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 3a260c3f055a..062c97bf62f5 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -1,15 +1,15 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2010 QLogic Corporation
+ * Copyright (c)  2003-2011 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.03.07.00"
+#define QLA2XXX_VERSION      "8.03.07.03-k"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	3
 #define QLA_DRIVER_PATCH_VER	7
-#define QLA_DRIVER_BETA_VER	0
+#define QLA_DRIVER_BETA_VER	3
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 230ba097d28c..c22f2a764d9d 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -2068,15 +2068,14 @@ static int qla4xxx_eh_abort(struct scsi_cmnd *cmd)
 	struct scsi_qla_host *ha = to_qla_host(cmd->device->host);
 	unsigned int id = cmd->device->id;
 	unsigned int lun = cmd->device->lun;
-	unsigned long serial = cmd->serial_number;
 	unsigned long flags;
 	struct srb *srb = NULL;
 	int ret = SUCCESS;
 	int wait = 0;
 
 	ql4_printk(KERN_INFO, ha,
-	    "scsi%ld:%d:%d: Abort command issued cmd=%p, pid=%ld\n",
-	    ha->host_no, id, lun, cmd, serial);
+	    "scsi%ld:%d:%d: Abort command issued cmd=%p\n",
+	    ha->host_no, id, lun, cmd);
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	srb = (struct srb *) CMD_SP(cmd);
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index e2d45c91b8e8..9689d41c7888 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = {
 	.use_clustering		= ENABLE_CLUSTERING,
 };
 
+static const struct of_device_id qpti_match[];
 static int __devinit qpti_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct scsi_host_template *tpnt;
 	struct device_node *dp = op->dev.of_node;
 	struct Scsi_Host *host;
@@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op)
 	static int nqptis;
 	const char *fcode;
 
-	if (!op->dev.of_match)
+	match = of_match_device(qpti_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	tpnt = op->dev.of_match->data;
+	tpnt = match->data;
 
 	/* Sometimes Antares cards come up not completely
 	 * setup, and we get a report of a zero IRQ.
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 633c2395a92a..abea2cf05c2e 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -321,6 +321,12 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
 				    "changed. The Linux SCSI layer does not "
 				    "automatically adjust these parameters.\n");
 
+		if (sshdr.asc == 0x38 && sshdr.ascq == 0x07)
+			scmd_printk(KERN_WARNING, scmd,
+				    "Warning! Received an indication that the "
+				    "LUN reached a thin provisioning soft "
+				    "threshold.\n");
+
 		/*
 		 * Pass the UA upwards for a determination in the completion
 		 * functions.
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6d5c7ff43f5b..ec1803a48723 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -74,8 +74,6 @@ struct kmem_cache *scsi_sdb_cache;
  */
 #define SCSI_QUEUE_DELAY	3
 
-static void scsi_run_queue(struct request_queue *q);
-
 /*
  * Function:	scsi_unprep_request()
  *
@@ -161,7 +159,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
 	blk_requeue_request(q, cmd->request);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	scsi_run_queue(q);
+	kblockd_schedule_work(q, &device->requeue_work);
 
 	return 0;
 }
@@ -400,10 +398,15 @@ static inline int scsi_host_is_busy(struct Scsi_Host *shost)
 static void scsi_run_queue(struct request_queue *q)
 {
 	struct scsi_device *sdev = q->queuedata;
-	struct Scsi_Host *shost = sdev->host;
+	struct Scsi_Host *shost;
 	LIST_HEAD(starved_list);
 	unsigned long flags;
 
+	/* if the device is dead, sdev will be NULL, so no queue to run */
+	if (!sdev)
+		return;
+
+	shost = sdev->host;
 	if (scsi_target(sdev)->single_lun)
 		scsi_single_lun_run(sdev);
 
@@ -411,8 +414,6 @@ static void scsi_run_queue(struct request_queue *q)
 	list_splice_init(&shost->starved_list, &starved_list);
 
 	while (!list_empty(&starved_list)) {
-		int flagset;
-
 		/*
 		 * As long as shost is accepting commands and we have
 		 * starved queues, call blk_run_queue. scsi_request_fn
@@ -436,18 +437,9 @@ static void scsi_run_queue(struct request_queue *q)
 		}
 
 		spin_unlock(shost->host_lock);
-
 		spin_lock(sdev->request_queue->queue_lock);
-		flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-				!test_bit(QUEUE_FLAG_REENTER,
-					&sdev->request_queue->queue_flags);
-		if (flagset)
-			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-		__blk_run_queue(sdev->request_queue, false);
-		if (flagset)
-			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
+		__blk_run_queue(sdev->request_queue);
 		spin_unlock(sdev->request_queue->queue_lock);
-
 		spin_lock(shost->host_lock);
 	}
 	/* put any unprocessed entries back */
@@ -457,6 +449,16 @@ static void scsi_run_queue(struct request_queue *q)
 	blk_run_queue(q);
 }
 
+void scsi_requeue_run_queue(struct work_struct *work)
+{
+	struct scsi_device *sdev;
+	struct request_queue *q;
+
+	sdev = container_of(work, struct scsi_device, requeue_work);
+	q = sdev->request_queue;
+	scsi_run_queue(q);
+}
+
 /*
  * Function:	scsi_requeue_command()
  *
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index c99da926fdac..f46855cd853d 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -386,13 +386,59 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
  * @s: output goes here
  * @p: not used
  */
-static int proc_scsi_show(struct seq_file *s, void *p)
+static int always_match(struct device *dev, void *data)
 {
-	seq_printf(s, "Attached devices:\n");
-	bus_for_each_dev(&scsi_bus_type, NULL, s, proc_print_scsidevice);
-	return 0;
+	return 1;
+}
+
+static inline struct device *next_scsi_device(struct device *start)
+{
+	struct device *next = bus_find_device(&scsi_bus_type, start, NULL,
+					      always_match);
+	put_device(start);
+	return next;
 }
 
+static void *scsi_seq_start(struct seq_file *sfile, loff_t *pos)
+{
+	struct device *dev = NULL;
+	loff_t n = *pos;
+
+	while ((dev = next_scsi_device(dev))) {
+		if (!n--)
+			break;
+		sfile->private++;
+	}
+	return dev;
+}
+
+static void *scsi_seq_next(struct seq_file *sfile, void *v, loff_t *pos)
+{
+	(*pos)++;
+	sfile->private++;
+	return next_scsi_device(v);
+}
+
+static void scsi_seq_stop(struct seq_file *sfile, void *v)
+{
+	put_device(v);
+}
+
+static int scsi_seq_show(struct seq_file *sfile, void *dev)
+{
+	if (!sfile->private)
+		seq_puts(sfile, "Attached devices:\n");
+
+	return proc_print_scsidevice(dev, sfile);
+}
+
+static const struct seq_operations scsi_seq_ops = {
+	.start	= scsi_seq_start,
+	.next	= scsi_seq_next,
+	.stop	= scsi_seq_stop,
+	.show	= scsi_seq_show
+};
+
 /**
  * proc_scsi_open - glue function
  * @inode: not used
@@ -406,7 +452,7 @@ static int proc_scsi_open(struct inode *inode, struct file *file)
 	 * We don't really need this for the write case but it doesn't
 	 * harm either.
 	 */
-	return single_open(file, proc_scsi_show, NULL);
+	return seq_open(file, &scsi_seq_ops);
 }
 
 static const struct file_operations proc_scsi_operations = {
@@ -415,7 +461,7 @@ static const struct file_operations proc_scsi_operations = {
 	.read		= seq_read,
 	.write		= proc_scsi_write,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= seq_release,
 };
 
 /**
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 087821fac8fe..58584dc0724a 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -242,6 +242,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	int display_failure_msg = 1, ret;
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 	extern void scsi_evt_thread(struct work_struct *work);
+	extern void scsi_requeue_run_queue(struct work_struct *work);
 
 	sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
 		       GFP_ATOMIC);
@@ -264,6 +265,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	INIT_LIST_HEAD(&sdev->event_list);
 	spin_lock_init(&sdev->list_lock);
 	INIT_WORK(&sdev->event_work, scsi_evt_thread);
+	INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue);
 
 	sdev->sdev_gendev.parent = get_device(&starget->dev);
 	sdev->sdev_target = starget;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index e44ff64233fd..e63912510fb9 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -322,14 +322,8 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 		kfree(evt);
 	}
 
-	if (sdev->request_queue) {
-		sdev->request_queue->queuedata = NULL;
-		/* user context needed to free queue */
-		scsi_free_queue(sdev->request_queue);
-		/* temporary expedient, try to catch use of queue lock
-		 * after free of sdev */
-		sdev->request_queue = NULL;
-	}
+	/* NULL queue means the device can't be used */
+	sdev->request_queue = NULL;
 
 	scsi_target_reap(scsi_target(sdev));
 
@@ -937,6 +931,12 @@ void __scsi_remove_device(struct scsi_device *sdev)
 	if (sdev->host->hostt->slave_destroy)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
+
+	/* cause the request function to reject all I/O requests */
+	sdev->request_queue->queuedata = NULL;
+
+	/* Freeing the queue signals to block that we're done */
+	scsi_free_queue(sdev->request_queue);
 	put_device(dev);
 }
 
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index 8bca8c25ba69..84a1fdf67864 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -275,10 +275,8 @@ void scsi_tgt_free_queue(struct Scsi_Host *shost)
 
 	for (i = 0; i < ARRAY_SIZE(qdata->cmd_hash); i++) {
 		list_for_each_entry_safe(tcmd, n, &qdata->cmd_hash[i],
-					 hash_list) {
-			list_del(&tcmd->hash_list);
-			list_add(&tcmd->hash_list, &cmds);
-		}
+					 hash_list)
+			list_move(&tcmd->hash_list, &cmds);
 	}
 
 	spin_unlock_irqrestore(&qdata->cmd_hash_lock, flags);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index fdf3fa639056..1b214910b714 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -422,8 +422,7 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 
 	snprintf(fc_host->work_q_name, sizeof(fc_host->work_q_name),
 		 "fc_wq_%d", shost->host_no);
-	fc_host->work_q = create_singlethread_workqueue(
-					fc_host->work_q_name);
+	fc_host->work_q = alloc_workqueue(fc_host->work_q_name, 0, 0);
 	if (!fc_host->work_q)
 		return -ENOMEM;
 
@@ -431,8 +430,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 	snprintf(fc_host->devloss_work_q_name,
 		 sizeof(fc_host->devloss_work_q_name),
 		 "fc_dl_%d", shost->host_no);
-	fc_host->devloss_work_q = create_singlethread_workqueue(
-					fc_host->devloss_work_q_name);
+	fc_host->devloss_work_q =
+			alloc_workqueue(fc_host->devloss_work_q_name, 0, 0);
 	if (!fc_host->devloss_work_q) {
 		destroy_workqueue(fc_host->work_q);
 		fc_host->work_q = NULL;
@@ -2489,6 +2488,8 @@ fc_rport_final_delete(struct work_struct *work)
 	unsigned long flags;
 	int do_callback = 0;
 
+	fc_terminate_rport_io(rport);
+
 	/*
 	 * if a scan is pending, flush the SCSI Host work_q so that
 	 * that we can reclaim the rport scan work element.
@@ -2496,8 +2497,6 @@ fc_rport_final_delete(struct work_struct *work)
 	if (rport->flags & FC_RPORT_SCAN_PENDING)
 		scsi_flush_work(shost);
 
-	fc_terminate_rport_io(rport);
-
 	/*
 	 * Cancel any outstanding timers. These should really exist
 	 * only when rmmod'ing the LLDD and we're asking for
@@ -3816,28 +3815,17 @@ fail_host_msg:
 static void
 fc_bsg_goose_queue(struct fc_rport *rport)
 {
-	int flagset;
-	unsigned long flags;
-
 	if (!rport->rqst_q)
 		return;
 
+	/*
+	 * This get/put dance makes no sense
+	 */
 	get_device(&rport->dev);
-
-	spin_lock_irqsave(rport->rqst_q->queue_lock, flags);
-	flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
-		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
-	if (flagset)
-		queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-	__blk_run_queue(rport->rqst_q, false);
-	if (flagset)
-		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
-	spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
-
+	blk_run_queue_async(rport->rqst_q);
 	put_device(&rport->dev);
 }
 
-
 /**
  * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD
  * @q:		rport request queue
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index a124a28f2ccb..a1baccce05f0 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -565,12 +565,12 @@ dc390_StartSCSI( struct dc390_acb* pACB, struct dc390_dcb* pDCB, struct dc390_sr
 	pDCB->TagMask |= 1 << tag[1];
 	pSRB->TagNumber = tag[1];
 	DC390_write8(ScsiFifo, tag[1]);
-	DEBUG1(printk(KERN_INFO "DC390: Select w/DisCn for Cmd %li (SRB %p), block tag %02x\n", scmd->serial_number, pSRB, tag[1]));
+	DEBUG1(printk(KERN_INFO "DC390: Select w/DisCn for SRB %p, block tag %02x\n", pSRB, tag[1]));
 	cmd = SEL_W_ATN3;
     } else {
 	/* No TagQ */
 //no_tag:
-	DEBUG1(printk(KERN_INFO "DC390: Select w%s/DisCn for Cmd %li (SRB %p), No TagQ\n", disc_allowed ? "" : "o", scmd->serial_number, pSRB));
+	DEBUG1(printk(KERN_INFO "DC390: Select w%s/DisCn for SRB %p, No TagQ\n", disc_allowed ? "" : "o", pSRB));
     }
 
     pSRB->SRBState = SRB_START_;
@@ -620,8 +620,8 @@ dc390_StartSCSI( struct dc390_acb* pACB, struct dc390_dcb* pDCB, struct dc390_sr
     if (DC390_read8 (Scsi_Status) & INTERRUPT)
     {
 	dc390_freetag (pDCB, pSRB);
-	DEBUG0(printk ("DC390: Interrupt during Start SCSI (pid %li, target %02i-%02i)\n",
-		scmd->serial_number, scmd->device->id, scmd->device->lun));
+	DEBUG0(printk ("DC390: Interrupt during Start SCSI (target %02i-%02i)\n",
+		scmd->device->id, scmd->device->lun));
 	pSRB->SRBState = SRB_READY;
 	//DC390_write8 (ScsiCmd, CLEAR_FIFO_CMD);
 	pACB->SelLost++;
@@ -1705,8 +1705,7 @@ dc390_SRBdone( struct dc390_acb* pACB, struct dc390_dcb* pDCB, struct dc390_srb*
 
     status = pSRB->TargetStatus;
 
-    DEBUG0(printk (" SRBdone (%02x,%08x), SRB %p, pid %li\n", status, pcmd->result,\
-		pSRB, pcmd->serial_number));
+    DEBUG0(printk (" SRBdone (%02x,%08x), SRB %p\n", status, pcmd->result, pSRB));
     if(pSRB->SRBFlag & AUTO_REQSENSE)
     {	/* Last command was a Request Sense */
 	pSRB->SRBFlag &= ~AUTO_REQSENSE;
@@ -1727,7 +1726,7 @@ dc390_SRBdone( struct dc390_acb* pACB, struct dc390_dcb* pDCB, struct dc390_srb*
 	    } else {
 		SET_RES_DRV(pcmd->result, DRIVER_SENSE);
 		//pSRB->ScsiCmdLen	 = (u8) (pSRB->Segment1[0] >> 8);
-		DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->serial_number, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
+		DEBUG0 (printk ("DC390: RETRY (%02x), target %02i-%02i\n", pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
 		pSRB->TotalXferredLen = 0;
 		SET_RES_DID(pcmd->result, DID_SOFT_ERROR);
 	    }
@@ -1747,7 +1746,7 @@ dc390_SRBdone( struct dc390_acb* pACB, struct dc390_dcb* pDCB, struct dc390_srb*
 	else if (status == SAM_STAT_TASK_SET_FULL)
 	{
 	    scsi_track_queue_full(pcmd->device, pDCB->GoingSRBCnt - 1);
-	    DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->serial_number, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
+	    DEBUG0 (printk ("DC390: RETRY (%02x), target %02i-%02i\n", pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
 	    pSRB->TotalXferredLen = 0;
 	    SET_RES_DID(pcmd->result, DID_SOFT_ERROR);
 	}
@@ -1801,7 +1800,7 @@ cmd_done:
     /* Add to free list */
     dc390_Free_insert (pACB, pSRB);
 
-    DEBUG0(printk (KERN_DEBUG "DC390: SRBdone: done pid %li\n", pcmd->serial_number));
+    DEBUG0(printk (KERN_DEBUG "DC390: SRBdone: done\n"));
     pcmd->scsi_done (pcmd);
 
     return;
@@ -1997,8 +1996,7 @@ static int DC390_abort(struct scsi_cmnd *cmd)
 	struct dc390_acb *pACB = (struct dc390_acb*) cmd->device->host->hostdata;
 	struct dc390_dcb *pDCB = (struct dc390_dcb*) cmd->device->hostdata;
 
-	scmd_printk(KERN_WARNING, cmd,
-		"DC390: Abort command (pid %li)\n", cmd->serial_number);
+	scmd_printk(KERN_WARNING, cmd, "DC390: Abort command\n");
 
 	/* abort() is too stupid for already sent commands at the moment. 
 	 * If it's called we are in trouble anyway, so let's dump some info 
@@ -2006,7 +2004,7 @@ static int DC390_abort(struct scsi_cmnd *cmd)
 	dc390_dumpinfo(pACB, pDCB, NULL);
 
 	pDCB->DCBFlag |= ABORT_DEV_;
-	printk(KERN_INFO "DC390: Aborted pid %li\n", cmd->serial_number);
+	printk(KERN_INFO "DC390: Aborted.\n");
 
 	return FAILED;
 }
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index edfc5da8be4c..90e104d6b558 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -1256,8 +1256,8 @@ static int u14_34f_queuecommand_lck(struct scsi_cmnd *SCpnt, void (*done)(struct
    j = ((struct hostdata *) SCpnt->device->host->hostdata)->board_number;
 
    if (SCpnt->host_scribble)
-      panic("%s: qcomm, pid %ld, SCpnt %p already active.\n",
-            BN(j), SCpnt->serial_number, SCpnt);
+      panic("%s: qcomm, SCpnt %p already active.\n",
+            BN(j), SCpnt);
 
    /* i is the mailbox number, look for the first free mailbox
       starting from last_cp_used */
@@ -1286,9 +1286,9 @@ static int u14_34f_queuecommand_lck(struct scsi_cmnd *SCpnt, void (*done)(struct
    cpp->cpp_index = i;
    SCpnt->host_scribble = (unsigned char *) &cpp->cpp_index;
 
-   if (do_trace) printk("%s: qcomm, mbox %d, target %d.%d:%d, pid %ld.\n",
+   if (do_trace) printk("%s: qcomm, mbox %d, target %d.%d:%d.\n",
                         BN(j), i, SCpnt->device->channel, SCpnt->device->id,
-                        SCpnt->device->lun, SCpnt->serial_number);
+                        SCpnt->device->lun);
 
    cpp->opcode = OP_SCSI;
    cpp->channel = SCpnt->device->channel;
@@ -1315,7 +1315,7 @@ static int u14_34f_queuecommand_lck(struct scsi_cmnd *SCpnt, void (*done)(struct
       unmap_dma(i, j);
       SCpnt->host_scribble = NULL;
       scmd_printk(KERN_INFO, SCpnt,
-      		"qcomm, pid %ld, adapter busy.\n", SCpnt->serial_number);
+      		"qcomm, adapter busy.\n");
       return 1;
       }
 
@@ -1337,14 +1337,12 @@ static int u14_34f_eh_abort(struct scsi_cmnd *SCarg) {
    j = ((struct hostdata *) SCarg->device->host->hostdata)->board_number;
 
    if (SCarg->host_scribble == NULL) {
-      scmd_printk(KERN_INFO, SCarg, "abort, pid %ld inactive.\n",
-             SCarg->serial_number);
+      scmd_printk(KERN_INFO, SCarg, "abort, command inactive.\n");
       return SUCCESS;
       }
 
    i = *(unsigned int *)SCarg->host_scribble;
-   scmd_printk(KERN_INFO, SCarg, "abort, mbox %d, pid %ld.\n",
-	       i, SCarg->serial_number);
+   scmd_printk(KERN_INFO, SCarg, "abort, mbox %d.\n", i);
 
    if (i >= sh[j]->can_queue)
       panic("%s: abort, invalid SCarg->host_scribble.\n", BN(j));
@@ -1387,8 +1385,7 @@ static int u14_34f_eh_abort(struct scsi_cmnd *SCarg) {
       SCarg->result = DID_ABORT << 16;
       SCarg->host_scribble = NULL;
       HD(j)->cp_stat[i] = FREE;
-      printk("%s, abort, mbox %d ready, DID_ABORT, pid %ld done.\n",
-             BN(j), i, SCarg->serial_number);
+      printk("%s, abort, mbox %d ready, DID_ABORT, done.\n", BN(j), i);
       SCarg->scsi_done(SCarg);
       return SUCCESS;
       }
@@ -1403,12 +1400,12 @@ static int u14_34f_eh_host_reset(struct scsi_cmnd *SCarg) {
    struct scsi_cmnd *SCpnt;
 
    j = ((struct hostdata *) SCarg->device->host->hostdata)->board_number;
-   scmd_printk(KERN_INFO, SCarg, "reset, enter, pid %ld.\n", SCarg->serial_number);
+   scmd_printk(KERN_INFO, SCarg, "reset, enter.\n");
 
    spin_lock_irq(sh[j]->host_lock);
 
    if (SCarg->host_scribble == NULL)
-      printk("%s: reset, pid %ld inactive.\n", BN(j), SCarg->serial_number);
+      printk("%s: reset, inactive.\n", BN(j));
 
    if (HD(j)->in_reset) {
       printk("%s: reset, exit, already in reset.\n", BN(j));
@@ -1445,14 +1442,12 @@ static int u14_34f_eh_host_reset(struct scsi_cmnd *SCarg) {
 
       if (HD(j)->cp_stat[i] == READY || HD(j)->cp_stat[i] == ABORTING) {
          HD(j)->cp_stat[i] = ABORTING;
-         printk("%s: reset, mbox %d aborting, pid %ld.\n",
-                BN(j), i, SCpnt->serial_number);
+         printk("%s: reset, mbox %d aborting.\n", BN(j), i);
          }
 
       else {
          HD(j)->cp_stat[i] = IN_RESET;
-         printk("%s: reset, mbox %d in reset, pid %ld.\n",
-                BN(j), i, SCpnt->serial_number);
+         printk("%s: reset, mbox %d in reset.\n", BN(j), i);
          }
 
       if (SCpnt->host_scribble == NULL)
@@ -1500,8 +1495,7 @@ static int u14_34f_eh_host_reset(struct scsi_cmnd *SCarg) {
          /* This mailbox is still waiting for its interrupt */
          HD(j)->cp_stat[i] = LOCKED;
 
-         printk("%s, reset, mbox %d locked, DID_RESET, pid %ld done.\n",
-                BN(j), i, SCpnt->serial_number);
+         printk("%s, reset, mbox %d locked, DID_RESET, done.\n", BN(j), i);
          }
 
       else if (HD(j)->cp_stat[i] == ABORTING) {
@@ -1513,8 +1507,7 @@ static int u14_34f_eh_host_reset(struct scsi_cmnd *SCarg) {
          /* This mailbox was never queued to the adapter */
          HD(j)->cp_stat[i] = FREE;
 
-         printk("%s, reset, mbox %d aborting, DID_RESET, pid %ld done.\n",
-                BN(j), i, SCpnt->serial_number);
+         printk("%s, reset, mbox %d aborting, DID_RESET, done.\n", BN(j), i);
          }
 
       else
@@ -1528,7 +1521,7 @@ static int u14_34f_eh_host_reset(struct scsi_cmnd *SCarg) {
    HD(j)->in_reset = FALSE;
    do_trace = FALSE;
 
-   if (arg_done) printk("%s: reset, exit, pid %ld done.\n", BN(j), SCarg->serial_number);
+   if (arg_done) printk("%s: reset, exit, done.\n", BN(j));
    else          printk("%s: reset, exit.\n", BN(j));
 
    spin_unlock_irq(sh[j]->host_lock);
@@ -1671,10 +1664,10 @@ static int reorder(unsigned int j, unsigned long cursec,
    if (link_statistics && (overlap || !(flushcount % link_statistics)))
       for (n = 0; n < n_ready; n++) {
          k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt;
-         printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %u"\
+         printk("%s %d.%d:%d mb %d fc %d nr %d sec %ld ns %u"\
                 " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
                 (ihdlr ? "ihdlr" : "qcomm"), SCpnt->channel, SCpnt->target,
-                SCpnt->lun, SCpnt->serial_number, k, flushcount, n_ready,
+                SCpnt->lun, k, flushcount, n_ready,
                 blk_rq_pos(SCpnt->request), blk_rq_sectors(SCpnt->request),
 		cursec, YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only),
                 YESNO(overlap), cpp->xdir);
@@ -1709,9 +1702,9 @@ static void flush_dev(struct scsi_device *dev, unsigned long cursec, unsigned in
 
       if (wait_on_busy(sh[j]->io_port, MAXLOOP)) {
          scmd_printk(KERN_INFO, SCpnt,
-	 	"%s, pid %ld, mbox %d, adapter"
+	 	"%s, mbox %d, adapter"
                 " busy, will abort.\n", (ihdlr ? "ihdlr" : "qcomm"),
-                SCpnt->serial_number, k);
+                k);
          HD(j)->cp_stat[k] = ABORTING;
          continue;
          }
@@ -1793,12 +1786,12 @@ static irqreturn_t ihdlr(unsigned int j)
    if (SCpnt == NULL) panic("%s: ihdlr, mbox %d, SCpnt == NULL.\n", BN(j), i);
 
    if (SCpnt->host_scribble == NULL)
-      panic("%s: ihdlr, mbox %d, pid %ld, SCpnt %p garbled.\n", BN(j), i,
-            SCpnt->serial_number, SCpnt);
+      panic("%s: ihdlr, mbox %d, SCpnt %p garbled.\n", BN(j), i,
+            SCpnt);
 
    if (*(unsigned int *)SCpnt->host_scribble != i)
-      panic("%s: ihdlr, mbox %d, pid %ld, index mismatch %d.\n",
-            BN(j), i, SCpnt->serial_number, *(unsigned int *)SCpnt->host_scribble);
+      panic("%s: ihdlr, mbox %d, index mismatch %d.\n",
+            BN(j), i, *(unsigned int *)SCpnt->host_scribble);
 
    sync_dma(i, j);
 
@@ -1841,8 +1834,8 @@ static irqreturn_t ihdlr(unsigned int j)
              (!(tstatus == CHECK_CONDITION && HD(j)->iocount <= 1000 &&
                (SCpnt->sense_buffer[2] & 0xf) == NOT_READY)))
             scmd_printk(KERN_INFO, SCpnt,
-	    	"ihdlr, pid %ld, target_status 0x%x, sense key 0x%x.\n",
-                   SCpnt->serial_number, spp->target_status,
+	    	"ihdlr, target_status 0x%x, sense key 0x%x.\n",
+                   spp->target_status,
                    SCpnt->sense_buffer[2]);
 
          HD(j)->target_to[scmd_id(SCpnt)][scmd_channel(SCpnt)] = 0;
@@ -1913,8 +1906,8 @@ static irqreturn_t ihdlr(unsigned int j)
         do_trace || msg_byte(spp->target_status))
 #endif
       scmd_printk(KERN_INFO, SCpnt, "ihdlr, mbox %2d, err 0x%x:%x,"\
-             " pid %ld, reg 0x%x, count %d.\n",
-             i, spp->adapter_status, spp->target_status, SCpnt->serial_number,
+             " reg 0x%x, count %d.\n",
+             i, spp->adapter_status, spp->target_status,
              reg, HD(j)->iocount);
 
    unmap_dma(i, j);
diff --git a/drivers/scsi/wd33c93.c b/drivers/scsi/wd33c93.c
index 4468ae3610f7..97ae716134d0 100644
--- a/drivers/scsi/wd33c93.c
+++ b/drivers/scsi/wd33c93.c
@@ -381,7 +381,7 @@ wd33c93_queuecommand_lck(struct scsi_cmnd *cmd,
 	hostdata = (struct WD33C93_hostdata *) cmd->device->host->hostdata;
 
 	DB(DB_QUEUE_COMMAND,
-	   printk("Q-%d-%02x-%ld( ", cmd->device->id, cmd->cmnd[0], cmd->serial_number))
+	   printk("Q-%d-%02x( ", cmd->device->id, cmd->cmnd[0]))
 
 /* Set up a few fields in the scsi_cmnd structure for our own use:
  *  - host_scribble is the pointer to the next cmd in the input queue
@@ -462,7 +462,7 @@ wd33c93_queuecommand_lck(struct scsi_cmnd *cmd,
 
 	wd33c93_execute(cmd->device->host);
 
-	DB(DB_QUEUE_COMMAND, printk(")Q-%ld ", cmd->serial_number))
+	DB(DB_QUEUE_COMMAND, printk(")Q "))
 
 	spin_unlock_irq(&hostdata->lock);
 	return 0;
@@ -687,7 +687,7 @@ wd33c93_execute(struct Scsi_Host *instance)
 	 */
 
 	DB(DB_EXECUTE,
-	   printk("%s%ld)EX-2 ", (cmd->SCp.phase) ? "d:" : "", cmd->serial_number))
+	   printk("%s)EX-2 ", (cmd->SCp.phase) ? "d:" : ""))
 }
 
 static void
@@ -963,7 +963,7 @@ wd33c93_intr(struct Scsi_Host *instance)
 	case CSR_XFER_DONE | PHS_COMMAND:
 	case CSR_UNEXP | PHS_COMMAND:
 	case CSR_SRV_REQ | PHS_COMMAND:
-		DB(DB_INTR, printk("CMND-%02x,%ld", cmd->cmnd[0], cmd->serial_number))
+		DB(DB_INTR, printk("CMND-%02x", cmd->cmnd[0]))
 		    transfer_pio(regs, cmd->cmnd, cmd->cmd_len, DATA_OUT_DIR,
 				 hostdata);
 		hostdata->state = S_CONNECTED;
@@ -1007,7 +1007,7 @@ wd33c93_intr(struct Scsi_Host *instance)
 		switch (msg) {
 
 		case COMMAND_COMPLETE:
-			DB(DB_INTR, printk("CCMP-%ld", cmd->serial_number))
+			DB(DB_INTR, printk("CCMP"))
 			    write_wd33c93_cmd(regs, WD_CMD_NEGATE_ACK);
 			hostdata->state = S_PRE_CMP_DISC;
 			break;
@@ -1174,7 +1174,7 @@ wd33c93_intr(struct Scsi_Host *instance)
 
 		write_wd33c93(regs, WD_SOURCE_ID, SRCID_ER);
 		if (phs == 0x60) {
-			DB(DB_INTR, printk("SX-DONE-%ld", cmd->serial_number))
+			DB(DB_INTR, printk("SX-DONE"))
 			    cmd->SCp.Message = COMMAND_COMPLETE;
 			lun = read_wd33c93(regs, WD_TARGET_LUN);
 			DB(DB_INTR, printk(":%d.%d", cmd->SCp.Status, lun))
@@ -1200,8 +1200,8 @@ wd33c93_intr(struct Scsi_Host *instance)
 			wd33c93_execute(instance);
 		} else {
 			printk
-			    ("%02x:%02x:%02x-%ld: Unknown SEL_XFER_DONE phase!!---",
-			     asr, sr, phs, cmd->serial_number);
+			    ("%02x:%02x:%02x: Unknown SEL_XFER_DONE phase!!---",
+			     asr, sr, phs);
 			spin_unlock_irqrestore(&hostdata->lock, flags);
 		}
 		break;
@@ -1266,7 +1266,7 @@ wd33c93_intr(struct Scsi_Host *instance)
 			spin_unlock_irqrestore(&hostdata->lock, flags);
 			return;
 		}
-		DB(DB_INTR, printk("UNEXP_DISC-%ld", cmd->serial_number))
+		DB(DB_INTR, printk("UNEXP_DISC"))
 		    hostdata->connected = NULL;
 		hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
 		hostdata->state = S_UNCONNECTED;
@@ -1292,7 +1292,7 @@ wd33c93_intr(struct Scsi_Host *instance)
  */
 
 		write_wd33c93(regs, WD_SOURCE_ID, SRCID_ER);
-		DB(DB_INTR, printk("DISC-%ld", cmd->serial_number))
+		DB(DB_INTR, printk("DISC"))
 		    if (cmd == NULL) {
 			printk(" - Already disconnected! ");
 			hostdata->state = S_UNCONNECTED;
@@ -1491,7 +1491,6 @@ wd33c93_intr(struct Scsi_Host *instance)
 		} else
 			hostdata->state = S_CONNECTED;
 
-		DB(DB_INTR, printk("-%ld", cmd->serial_number))
 		    spin_unlock_irqrestore(&hostdata->lock, flags);
 		break;
 
@@ -1637,8 +1636,8 @@ wd33c93_abort(struct scsi_cmnd * cmd)
 			cmd->host_scribble = NULL;
 			cmd->result = DID_ABORT << 16;
 			printk
-			    ("scsi%d: Abort - removing command %ld from input_Q. ",
-			     instance->host_no, cmd->serial_number);
+			    ("scsi%d: Abort - removing command from input_Q. ",
+			     instance->host_no);
 			enable_irq(cmd->device->host->irq);
 			cmd->scsi_done(cmd);
 			return SUCCESS;
@@ -1662,8 +1661,8 @@ wd33c93_abort(struct scsi_cmnd * cmd)
 		uchar sr, asr;
 		unsigned long timeout;
 
-		printk("scsi%d: Aborting connected command %ld - ",
-		       instance->host_no, cmd->serial_number);
+		printk("scsi%d: Aborting connected command - ",
+		       instance->host_no);
 
 		printk("stopping DMA - ");
 		if (hostdata->dma == D_DMA_RUNNING) {
@@ -1729,8 +1728,8 @@ wd33c93_abort(struct scsi_cmnd * cmd)
 	while (tmp) {
 		if (tmp == cmd) {
 			printk
-			    ("scsi%d: Abort - command %ld found on disconnected_Q - ",
-			     instance->host_no, cmd->serial_number);
+			    ("scsi%d: Abort - command found on disconnected_Q - ",
+			     instance->host_no);
 			printk("Abort SNOOZE. ");
 			enable_irq(cmd->device->host->irq);
 			return FAILED;
@@ -2180,8 +2179,8 @@ wd33c93_proc_info(struct Scsi_Host *instance, char *buf, char **start, off_t off
 		strcat(bp, "\nconnected:     ");
 		if (hd->connected) {
 			cmd = (struct scsi_cmnd *) hd->connected;
-			sprintf(tbuf, " %ld-%d:%d(%02x)",
-				cmd->serial_number, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
+			sprintf(tbuf, " %d:%d(%02x)",
+				cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
 			strcat(bp, tbuf);
 		}
 	}
@@ -2189,8 +2188,8 @@ wd33c93_proc_info(struct Scsi_Host *instance, char *buf, char **start, off_t off
 		strcat(bp, "\ninput_Q:       ");
 		cmd = (struct scsi_cmnd *) hd->input_Q;
 		while (cmd) {
-			sprintf(tbuf, " %ld-%d:%d(%02x)",
-				cmd->serial_number, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
+			sprintf(tbuf, " %d:%d(%02x)",
+				cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
 			strcat(bp, tbuf);
 			cmd = (struct scsi_cmnd *) cmd->host_scribble;
 		}
@@ -2199,8 +2198,8 @@ wd33c93_proc_info(struct Scsi_Host *instance, char *buf, char **start, off_t off
 		strcat(bp, "\ndisconnected_Q:");
 		cmd = (struct scsi_cmnd *) hd->disconnected_Q;
 		while (cmd) {
-			sprintf(tbuf, " %ld-%d:%d(%02x)",
-				cmd->serial_number, cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
+			sprintf(tbuf, " %d:%d(%02x)",
+				cmd->device->id, cmd->device->lun, cmd->cmnd[0]);
 			strcat(bp, tbuf);
 			cmd = (struct scsi_cmnd *) cmd->host_scribble;
 		}
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index 5825370bad25..08de58e7f59f 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -1555,7 +1555,7 @@ static int stop_queue(struct pl022 *pl022)
 	 * A wait_queue on the pl022->busy could be used, but then the common
 	 * execution path (pump_messages) would be required to call wake_up or
 	 * friends on every SPI message. Do this instead */
-	while (!list_empty(&pl022->queue) && pl022->busy && limit--) {
+	while ((!list_empty(&pl022->queue) || pl022->busy) && limit--) {
 		spin_unlock_irqrestore(&pl022->queue_lock, flags);
 		msleep(10);
 		spin_lock_irqsave(&pl022->queue_lock, flags);
diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
index b1a4b9f503ae..871e337c917f 100644
--- a/drivers/spi/dw_spi.c
+++ b/drivers/spi/dw_spi.c
@@ -821,7 +821,7 @@ static int stop_queue(struct dw_spi *dws)
 
 	spin_lock_irqsave(&dws->lock, flags);
 	dws->run = QUEUE_STOPPED;
-	while (!list_empty(&dws->queue) && dws->busy && limit--) {
+	while ((!list_empty(&dws->queue) || dws->busy) && limit--) {
 		spin_unlock_irqrestore(&dws->lock, flags);
 		msleep(10);
 		spin_lock_irqsave(&dws->lock, flags);
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 9c74aad6be93..dc25bee8d33f 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1493,7 +1493,7 @@ static int stop_queue(struct driver_data *drv_data)
 	 * execution path (pump_messages) would be required to call wake_up or
 	 * friends on every SPI message. Do this instead */
 	drv_data->run = QUEUE_STOPPED;
-	while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) {
+	while ((!list_empty(&drv_data->queue) || drv_data->busy) && limit--) {
 		spin_unlock_irqrestore(&drv_data->lock, flags);
 		msleep(10);
 		spin_lock_irqsave(&drv_data->lock, flags);
diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c
index bdb7289a1d22..f706dba165cf 100644
--- a/drivers/spi/spi_bfin5xx.c
+++ b/drivers/spi/spi_bfin5xx.c
@@ -1284,7 +1284,7 @@ static inline int bfin_spi_stop_queue(struct bfin_spi_master_data *drv_data)
 	 * friends on every SPI message. Do this instead
 	 */
 	drv_data->running = false;
-	while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) {
+	while ((!list_empty(&drv_data->queue) || drv_data->busy) && limit--) {
 		spin_unlock_irqrestore(&drv_data->lock, flags);
 		msleep(10);
 		spin_lock_irqsave(&drv_data->lock, flags);
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 6f34963b3c64..7ad48585c5e6 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -662,7 +662,6 @@ static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 static int ssb_pci_sprom_get(struct ssb_bus *bus,
 			     struct ssb_sprom *sprom)
 {
-	const struct ssb_sprom *fallback;
 	int err;
 	u16 *buf;
 
@@ -707,10 +706,17 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus,
 		if (err) {
 			/* All CRC attempts failed.
 			 * Maybe there is no SPROM on the device?
-			 * If we have a fallback, use that. */
-			fallback = ssb_get_fallback_sprom();
-			if (fallback) {
-				memcpy(sprom, fallback, sizeof(*sprom));
+			 * Now we ask the arch code if there is some sprom
+			 * available for this device in some other storage */
+			err = ssb_fill_sprom_with_fallback(bus, sprom);
+			if (err) {
+				ssb_printk(KERN_WARNING PFX "WARNING: Using"
+					   " fallback SPROM failed (err %d)\n",
+					   err);
+			} else {
+				ssb_dprintk(KERN_DEBUG PFX "Using SPROM"
+					    " revision %d provided by"
+					    " platform.\n", sprom->revision);
 				err = 0;
 				goto out_free;
 			}
diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c
index 5f34d7a3e3a5..45ff0e3a3828 100644
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 
 
-static const struct ssb_sprom *fallback_sprom;
+static int(*get_fallback_sprom)(struct ssb_bus *dev, struct ssb_sprom *out);
 
 
 static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len,
@@ -145,36 +145,43 @@ out:
 }
 
 /**
- * ssb_arch_set_fallback_sprom - Set a fallback SPROM for use if no SPROM is found.
+ * ssb_arch_register_fallback_sprom - Registers a method providing a
+ * fallback SPROM if no SPROM is found.
  *
- * @sprom: The SPROM data structure to register.
+ * @sprom_callback: The callback function.
  *
- * With this function the architecture implementation may register a fallback
- * SPROM data structure. The fallback is only used for PCI based SSB devices,
- * where no valid SPROM can be found in the shadow registers.
+ * With this function the architecture implementation may register a
+ * callback handler which fills the SPROM data structure. The fallback is
+ * only used for PCI based SSB devices, where no valid SPROM can be found
+ * in the shadow registers.
  *
- * This function is useful for weird architectures that have a half-assed SSB device
- * hardwired to their PCI bus.
+ * This function is useful for weird architectures that have a half-assed
+ * SSB device hardwired to their PCI bus.
  *
- * Note that it does only work with PCI attached SSB devices. PCMCIA devices currently
- * don't use this fallback.
- * Architectures must provide the SPROM for native SSB devices anyway,
- * so the fallback also isn't used for native devices.
+ * Note that it does only work with PCI attached SSB devices. PCMCIA
+ * devices currently don't use this fallback.
+ * Architectures must provide the SPROM for native SSB devices anyway, so
+ * the fallback also isn't used for native devices.
  *
- * This function is available for architecture code, only. So it is not exported.
+ * This function is available for architecture code, only. So it is not
+ * exported.
  */
-int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom)
+int ssb_arch_register_fallback_sprom(int (*sprom_callback)(struct ssb_bus *bus,
+				     struct ssb_sprom *out))
 {
-	if (fallback_sprom)
+	if (get_fallback_sprom)
 		return -EEXIST;
-	fallback_sprom = sprom;
+	get_fallback_sprom = sprom_callback;
 
 	return 0;
 }
 
-const struct ssb_sprom *ssb_get_fallback_sprom(void)
+int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out)
 {
-	return fallback_sprom;
+	if (!get_fallback_sprom)
+		return -ENOENT;
+
+	return get_fallback_sprom(bus, out);
 }
 
 /* http://bcm-v4.sipsolutions.net/802.11/IsSpromAvailable */
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 0331139a726f..77653014db0b 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -171,7 +171,8 @@ ssize_t ssb_attr_sprom_store(struct ssb_bus *bus,
 			     const char *buf, size_t count,
 			     int (*sprom_check_crc)(const u16 *sprom, size_t size),
 			     int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom));
-extern const struct ssb_sprom *ssb_get_fallback_sprom(void);
+extern int ssb_fill_sprom_with_fallback(struct ssb_bus *bus,
+					struct ssb_sprom *out);
 
 
 /* core.c */
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index dca4a0bb6ca9..e3786f161bc3 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -131,8 +131,6 @@ source "drivers/staging/wlags49_h2/Kconfig"
 
 source "drivers/staging/wlags49_h25/Kconfig"
 
-source "drivers/staging/samsung-laptop/Kconfig"
-
 source "drivers/staging/sm7xx/Kconfig"
 
 source "drivers/staging/dt3155v4l/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index eb93012b6f59..f0d5c5315612 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -48,7 +48,6 @@ obj-$(CONFIG_XVMALLOC)		+= zram/
 obj-$(CONFIG_ZCACHE)		+= zcache/
 obj-$(CONFIG_WLAGS49_H2)	+= wlags49_h2/
 obj-$(CONFIG_WLAGS49_H25)	+= wlags49_h25/
-obj-$(CONFIG_SAMSUNG_LAPTOP)	+= samsung-laptop/
 obj-$(CONFIG_FB_SM7XX)		+= sm7xx/
 obj-$(CONFIG_VIDEO_DT3155)	+= dt3155v4l/
 obj-$(CONFIG_CRYSTALHD)		+= crystalhd/
diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
index eeb7dd43f9a8..830822f86e41 100644
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
@@ -2288,7 +2288,3 @@ err_dev:
 	free_netdev(dev);
 	return NULL;
 }
-
-EXPORT_SYMBOL(init_ft1000_card);
-EXPORT_SYMBOL(stop_ft1000_card);
-EXPORT_SYMBOL(flarion_ft1000_cnt);
diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c
index 935608e72007..bdfb1aec58df 100644
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c
@@ -214,6 +214,3 @@ void ft1000CleanupProc(struct net_device *dev)
 	remove_proc_entry(FT1000_PROC, init_net.proc_net);
 	unregister_netdevice_notifier(&ft1000_netdev_notifier);
 }
-
-EXPORT_SYMBOL(ft1000InitProc);
-EXPORT_SYMBOL(ft1000CleanupProc);
diff --git a/drivers/staging/gma500/Kconfig b/drivers/staging/gma500/Kconfig
index 5501eb9b3355..ce8bedaeaac2 100644
--- a/drivers/staging/gma500/Kconfig
+++ b/drivers/staging/gma500/Kconfig
@@ -1,6 +1,6 @@
 config DRM_PSB
 	tristate "Intel GMA500 KMS Framebuffer"
-	depends on DRM && PCI
+	depends on DRM && PCI && X86
 	select FB_CFB_COPYAREA
         select FB_CFB_FILLRECT
         select FB_CFB_IMAGEBLIT
diff --git a/drivers/staging/intel_sst/intelmid_v1_control.c b/drivers/staging/intel_sst/intelmid_v1_control.c
index 9cc15c1c18d4..1ea814218059 100644
--- a/drivers/staging/intel_sst/intelmid_v1_control.c
+++ b/drivers/staging/intel_sst/intelmid_v1_control.c
@@ -28,6 +28,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/pci.h>
+#include <linux/delay.h>
 #include <linux/file.h>
 #include <asm/mrst.h>
 #include <sound/pcm.h>
diff --git a/drivers/staging/intel_sst/intelmid_v2_control.c b/drivers/staging/intel_sst/intelmid_v2_control.c
index 26d815a67eb8..3c6b3abff3c3 100644
--- a/drivers/staging/intel_sst/intelmid_v2_control.c
+++ b/drivers/staging/intel_sst/intelmid_v2_control.c
@@ -29,6 +29,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/pci.h>
+#include <linux/delay.h>
 #include <linux/file.h>
 #include "intel_sst.h"
 #include "intelmid_snd_control.h"
diff --git a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c
index b5d21f6497f9..22c04eabed41 100644
--- a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c
+++ b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c
@@ -12,6 +12,7 @@
  */
 #include <linux/cs5535.h>
 #include <linux/gpio.h>
+#include <linux/delay.h>
 #include <asm/olpc.h>
 
 #include "olpc_dcon.h"
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index c93ef207b0b4..c0f0ac7c1cdb 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/statfs.h>
 #include <linux/writeback.h>
+#include <linux/prefetch.h>
 
 #include "netfs.h"
 
diff --git a/drivers/staging/rt2860/common/cmm_data_pci.c b/drivers/staging/rt2860/common/cmm_data_pci.c
index bef0bbd8cef7..f01a51c381f1 100644
--- a/drivers/staging/rt2860/common/cmm_data_pci.c
+++ b/drivers/staging/rt2860/common/cmm_data_pci.c
@@ -444,7 +444,7 @@ int RTMPCheckRxError(struct rt_rtmp_adapter *pAd,
 			return (NDIS_STATUS_FAILURE);
 		}
 	}
-	/* Drop not U2M frames, can't's drop here because we will drop beacon in this case */
+	/* Drop not U2M frames, can't drop here because we will drop beacon in this case */
 	/* I am kind of doubting the U2M bit operation */
 	/* if (pRxD->U2M == 0) */
 	/*      return(NDIS_STATUS_FAILURE); */
diff --git a/drivers/staging/rt2860/common/cmm_data_usb.c b/drivers/staging/rt2860/common/cmm_data_usb.c
index 5637857ae9eb..83a62faa7e57 100644
--- a/drivers/staging/rt2860/common/cmm_data_usb.c
+++ b/drivers/staging/rt2860/common/cmm_data_usb.c
@@ -860,7 +860,7 @@ int RTMPCheckRxError(struct rt_rtmp_adapter *pAd,
 		DBGPRINT_RAW(RT_DEBUG_ERROR, ("received packet too long\n"));
 		return NDIS_STATUS_FAILURE;
 	}
-	/* Drop not U2M frames, can't's drop here because we will drop beacon in this case */
+	/* Drop not U2M frames, can't drop here because we will drop beacon in this case */
 	/* I am kind of doubting the U2M bit operation */
 	/* if (pRxD->U2M == 0) */
 	/*      return(NDIS_STATUS_FAILURE); */
diff --git a/drivers/staging/rts_pstor/debug.h b/drivers/staging/rts_pstor/debug.h
index e1408b0e7ae4..ab305be96fb5 100644
--- a/drivers/staging/rts_pstor/debug.h
+++ b/drivers/staging/rts_pstor/debug.h
@@ -28,7 +28,7 @@
 
 #define RTSX_STOR "rts_pstor: "
 
-#if CONFIG_RTS_PSTOR_DEBUG
+#ifdef CONFIG_RTS_PSTOR_DEBUG
 #define RTSX_DEBUGP(x...) printk(KERN_DEBUG RTSX_STOR x)
 #define RTSX_DEBUGPN(x...) printk(KERN_DEBUG x)
 #define RTSX_DEBUGPX(x...) printk(x)
diff --git a/drivers/staging/rts_pstor/ms.c b/drivers/staging/rts_pstor/ms.c
index 810e170894f5..d89795c6a3ac 100644
--- a/drivers/staging/rts_pstor/ms.c
+++ b/drivers/staging/rts_pstor/ms.c
@@ -23,6 +23,7 @@
 #include <linux/blkdev.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include "rtsx.h"
 #include "rtsx_transport.h"
diff --git a/drivers/staging/rts_pstor/rtsx_chip.c b/drivers/staging/rts_pstor/rtsx_chip.c
index d2f1c715a684..4e60780ea804 100644
--- a/drivers/staging/rts_pstor/rtsx_chip.c
+++ b/drivers/staging/rts_pstor/rtsx_chip.c
@@ -24,6 +24,7 @@
 #include <linux/kthread.h>
 #include <linux/sched.h>
 #include <linux/workqueue.h>
+#include <linux/vmalloc.h>
 
 #include "rtsx.h"
 #include "rtsx_transport.h"
@@ -1311,11 +1312,11 @@ void rtsx_polling_func(struct rtsx_chip *chip)
 
 #ifdef SUPPORT_OCP
 	if (CHECK_LUN_MODE(chip, SD_MS_2LUN)) {
-		#if CONFIG_RTS_PSTOR_DEBUG
+#ifdef CONFIG_RTS_PSTOR_DEBUG
 		if (chip->ocp_stat & (SD_OC_NOW | SD_OC_EVER | MS_OC_NOW | MS_OC_EVER)) {
 			RTSX_DEBUGP("Over current, OCPSTAT is 0x%x\n", chip->ocp_stat);
 		}
-		#endif
+#endif
 
 		if (chip->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) {
 			if (chip->card_exist & SD_CARD) {
diff --git a/drivers/staging/rts_pstor/rtsx_scsi.c b/drivers/staging/rts_pstor/rtsx_scsi.c
index 20c2464a20f9..7de1fae443fc 100644
--- a/drivers/staging/rts_pstor/rtsx_scsi.c
+++ b/drivers/staging/rts_pstor/rtsx_scsi.c
@@ -23,6 +23,7 @@
 #include <linux/blkdev.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include "rtsx.h"
 #include "rtsx_transport.h"
diff --git a/drivers/staging/rts_pstor/sd.c b/drivers/staging/rts_pstor/sd.c
index 8d066bd428c4..b1277a6c7a8b 100644
--- a/drivers/staging/rts_pstor/sd.c
+++ b/drivers/staging/rts_pstor/sd.c
@@ -909,7 +909,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir)
 		RTSX_WRITE_REG(chip, SD_VPCLK0_CTL, PHASE_NOT_RESET, PHASE_NOT_RESET);
 		RTSX_WRITE_REG(chip, CLK_CTL, CHANGE_CLK, 0);
 	} else {
-#if CONFIG_RTS_PSTOR_DEBUG
+#ifdef CONFIG_RTS_PSTOR_DEBUG
 		rtsx_read_register(chip, SD_VP_CTL, &val);
 		RTSX_DEBUGP("SD_VP_CTL: 0x%x\n", val);
 		rtsx_read_register(chip, SD_DCMPS_CTL, &val);
@@ -958,7 +958,7 @@ static int sd_change_phase(struct rtsx_chip *chip, u8 sample_point, u8 tune_dir)
 	return STATUS_SUCCESS;
 
 Fail:
-#if CONFIG_RTS_PSTOR_DEBUG
+#ifdef CONFIG_RTS_PSTOR_DEBUG
 	rtsx_read_register(chip, SD_VP_CTL, &val);
 	RTSX_DEBUGP("SD_VP_CTL: 0x%x\n", val);
 	rtsx_read_register(chip, SD_DCMPS_CTL, &val);
diff --git a/drivers/staging/rts_pstor/trace.h b/drivers/staging/rts_pstor/trace.h
index 2c668bae6ff4..bc83b49a4eb4 100644
--- a/drivers/staging/rts_pstor/trace.h
+++ b/drivers/staging/rts_pstor/trace.h
@@ -82,7 +82,7 @@ do {													\
 #define TRACE_GOTO(chip, label)	goto label
 #endif
 
-#if CONFIG_RTS_PSTOR_DEBUG
+#ifdef CONFIG_RTS_PSTOR_DEBUG
 static inline void rtsx_dump(u8 *buf, int buf_len)
 {
 	int i;
diff --git a/drivers/staging/rts_pstor/xd.c b/drivers/staging/rts_pstor/xd.c
index 7bcd468b8f2c..9f3add1e8f59 100644
--- a/drivers/staging/rts_pstor/xd.c
+++ b/drivers/staging/rts_pstor/xd.c
@@ -23,6 +23,7 @@
 #include <linux/blkdev.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include "rtsx.h"
 #include "rtsx_transport.h"
diff --git a/drivers/staging/samsung-laptop/Kconfig b/drivers/staging/samsung-laptop/Kconfig
deleted file mode 100644
index f27c60864c26..000000000000
--- a/drivers/staging/samsung-laptop/Kconfig
+++ /dev/null
@@ -1,10 +0,0 @@
-config SAMSUNG_LAPTOP
-	tristate "Samsung Laptop driver"
-	default n
-	depends on RFKILL && BACKLIGHT_CLASS_DEVICE && X86
-	help
-	  This module implements a driver for the N128 Samsung Laptop
-	  providing control over the Wireless LED and the LCD backlight
-
-	  To compile this driver as a module, choose
-	  M here: the module will be called samsung-laptop.
diff --git a/drivers/staging/samsung-laptop/Makefile b/drivers/staging/samsung-laptop/Makefile
deleted file mode 100644
index 3c6f42045211..000000000000
--- a/drivers/staging/samsung-laptop/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_SAMSUNG_LAPTOP)	+= samsung-laptop.o
diff --git a/drivers/staging/samsung-laptop/TODO b/drivers/staging/samsung-laptop/TODO
deleted file mode 100644
index f7a6d589916e..000000000000
--- a/drivers/staging/samsung-laptop/TODO
+++ /dev/null
@@ -1,5 +0,0 @@
-TODO:
-	- review from other developers
-	- figure out ACPI video issues
-
-Please send patches to Greg Kroah-Hartman <gregkh@suse.de>
diff --git a/drivers/staging/samsung-laptop/samsung-laptop.c b/drivers/staging/samsung-laptop/samsung-laptop.c
deleted file mode 100644
index 25294462b8b6..000000000000
--- a/drivers/staging/samsung-laptop/samsung-laptop.c
+++ /dev/null
@@ -1,843 +0,0 @@
-/*
- * Samsung Laptop driver
- *
- * Copyright (C) 2009,2011 Greg Kroah-Hartman (gregkh@suse.de)
- * Copyright (C) 2009,2011 Novell Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/backlight.h>
-#include <linux/fb.h>
-#include <linux/dmi.h>
-#include <linux/platform_device.h>
-#include <linux/rfkill.h>
-
-/*
- * This driver is needed because a number of Samsung laptops do not hook
- * their control settings through ACPI.  So we have to poke around in the
- * BIOS to do things like brightness values, and "special" key controls.
- */
-
-/*
- * We have 0 - 8 as valid brightness levels.  The specs say that level 0 should
- * be reserved by the BIOS (which really doesn't make much sense), we tell
- * userspace that the value is 0 - 7 and then just tell the hardware 1 - 8
- */
-#define MAX_BRIGHT	0x07
-
-
-#define SABI_IFACE_MAIN			0x00
-#define SABI_IFACE_SUB			0x02
-#define SABI_IFACE_COMPLETE		0x04
-#define SABI_IFACE_DATA			0x05
-
-/* Structure to get data back to the calling function */
-struct sabi_retval {
-	u8 retval[20];
-};
-
-struct sabi_header_offsets {
-	u8 port;
-	u8 re_mem;
-	u8 iface_func;
-	u8 en_mem;
-	u8 data_offset;
-	u8 data_segment;
-};
-
-struct sabi_commands {
-	/*
-	 * Brightness is 0 - 8, as described above.
-	 * Value 0 is for the BIOS to use
-	 */
-	u8 get_brightness;
-	u8 set_brightness;
-
-	/*
-	 * first byte:
-	 * 0x00 - wireless is off
-	 * 0x01 - wireless is on
-	 * second byte:
-	 * 0x02 - 3G is off
-	 * 0x03 - 3G is on
-	 * TODO, verify 3G is correct, that doesn't seem right...
-	 */
-	u8 get_wireless_button;
-	u8 set_wireless_button;
-
-	/* 0 is off, 1 is on */
-	u8 get_backlight;
-	u8 set_backlight;
-
-	/*
-	 * 0x80 or 0x00 - no action
-	 * 0x81 - recovery key pressed
-	 */
-	u8 get_recovery_mode;
-	u8 set_recovery_mode;
-
-	/*
-	 * on seclinux: 0 is low, 1 is high,
-	 * on swsmi: 0 is normal, 1 is silent, 2 is turbo
-	 */
-	u8 get_performance_level;
-	u8 set_performance_level;
-
-	/*
-	 * Tell the BIOS that Linux is running on this machine.
-	 * 81 is on, 80 is off
-	 */
-	u8 set_linux;
-};
-
-struct sabi_performance_level {
-	const char *name;
-	u8 value;
-};
-
-struct sabi_config {
-	const char *test_string;
-	u16 main_function;
-	const struct sabi_header_offsets header_offsets;
-	const struct sabi_commands commands;
-	const struct sabi_performance_level performance_levels[4];
-	u8 min_brightness;
-	u8 max_brightness;
-};
-
-static const struct sabi_config sabi_configs[] = {
-	{
-		.test_string = "SECLINUX",
-
-		.main_function = 0x4c49,
-
-		.header_offsets = {
-			.port = 0x00,
-			.re_mem = 0x02,
-			.iface_func = 0x03,
-			.en_mem = 0x04,
-			.data_offset = 0x05,
-			.data_segment = 0x07,
-		},
-
-		.commands = {
-			.get_brightness = 0x00,
-			.set_brightness = 0x01,
-
-			.get_wireless_button = 0x02,
-			.set_wireless_button = 0x03,
-
-			.get_backlight = 0x04,
-			.set_backlight = 0x05,
-
-			.get_recovery_mode = 0x06,
-			.set_recovery_mode = 0x07,
-
-			.get_performance_level = 0x08,
-			.set_performance_level = 0x09,
-
-			.set_linux = 0x0a,
-		},
-
-		.performance_levels = {
-			{
-				.name = "silent",
-				.value = 0,
-			},
-			{
-				.name = "normal",
-				.value = 1,
-			},
-			{ },
-		},
-		.min_brightness = 1,
-		.max_brightness = 8,
-	},
-	{
-		.test_string = "SwSmi@",
-
-		.main_function = 0x5843,
-
-		.header_offsets = {
-			.port = 0x00,
-			.re_mem = 0x04,
-			.iface_func = 0x02,
-			.en_mem = 0x03,
-			.data_offset = 0x05,
-			.data_segment = 0x07,
-		},
-
-		.commands = {
-			.get_brightness = 0x10,
-			.set_brightness = 0x11,
-
-			.get_wireless_button = 0x12,
-			.set_wireless_button = 0x13,
-
-			.get_backlight = 0x2d,
-			.set_backlight = 0x2e,
-
-			.get_recovery_mode = 0xff,
-			.set_recovery_mode = 0xff,
-
-			.get_performance_level = 0x31,
-			.set_performance_level = 0x32,
-
-			.set_linux = 0xff,
-		},
-
-		.performance_levels = {
-			{
-				.name = "normal",
-				.value = 0,
-			},
-			{
-				.name = "silent",
-				.value = 1,
-			},
-			{
-				.name = "overclock",
-				.value = 2,
-			},
-			{ },
-		},
-		.min_brightness = 0,
-		.max_brightness = 8,
-	},
-	{ },
-};
-
-static const struct sabi_config *sabi_config;
-
-static void __iomem *sabi;
-static void __iomem *sabi_iface;
-static void __iomem *f0000_segment;
-static struct backlight_device *backlight_device;
-static struct mutex sabi_mutex;
-static struct platform_device *sdev;
-static struct rfkill *rfk;
-
-static int force;
-module_param(force, bool, 0);
-MODULE_PARM_DESC(force,
-		"Disable the DMI check and forces the driver to be loaded");
-
-static int debug;
-module_param(debug, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "Debug enabled or not");
-
-static int sabi_get_command(u8 command, struct sabi_retval *sretval)
-{
-	int retval = 0;
-	u16 port = readw(sabi + sabi_config->header_offsets.port);
-	u8 complete, iface_data;
-
-	mutex_lock(&sabi_mutex);
-
-	/* enable memory to be able to write to it */
-	outb(readb(sabi + sabi_config->header_offsets.en_mem), port);
-
-	/* write out the command */
-	writew(sabi_config->main_function, sabi_iface + SABI_IFACE_MAIN);
-	writew(command, sabi_iface + SABI_IFACE_SUB);
-	writeb(0, sabi_iface + SABI_IFACE_COMPLETE);
-	outb(readb(sabi + sabi_config->header_offsets.iface_func), port);
-
-	/* write protect memory to make it safe */
-	outb(readb(sabi + sabi_config->header_offsets.re_mem), port);
-
-	/* see if the command actually succeeded */
-	complete = readb(sabi_iface + SABI_IFACE_COMPLETE);
-	iface_data = readb(sabi_iface + SABI_IFACE_DATA);
-	if (complete != 0xaa || iface_data == 0xff) {
-		pr_warn("SABI get command 0x%02x failed with completion flag 0x%02x and data 0x%02x\n",
-		        command, complete, iface_data);
-		retval = -EINVAL;
-		goto exit;
-	}
-	/*
-	 * Save off the data into a structure so the caller use it.
-	 * Right now we only want the first 4 bytes,
-	 * There are commands that need more, but not for the ones we
-	 * currently care about.
-	 */
-	sretval->retval[0] = readb(sabi_iface + SABI_IFACE_DATA);
-	sretval->retval[1] = readb(sabi_iface + SABI_IFACE_DATA + 1);
-	sretval->retval[2] = readb(sabi_iface + SABI_IFACE_DATA + 2);
-	sretval->retval[3] = readb(sabi_iface + SABI_IFACE_DATA + 3);
-
-exit:
-	mutex_unlock(&sabi_mutex);
-	return retval;
-
-}
-
-static int sabi_set_command(u8 command, u8 data)
-{
-	int retval = 0;
-	u16 port = readw(sabi + sabi_config->header_offsets.port);
-	u8 complete, iface_data;
-
-	mutex_lock(&sabi_mutex);
-
-	/* enable memory to be able to write to it */
-	outb(readb(sabi + sabi_config->header_offsets.en_mem), port);
-
-	/* write out the command */
-	writew(sabi_config->main_function, sabi_iface + SABI_IFACE_MAIN);
-	writew(command, sabi_iface + SABI_IFACE_SUB);
-	writeb(0, sabi_iface + SABI_IFACE_COMPLETE);
-	writeb(data, sabi_iface + SABI_IFACE_DATA);
-	outb(readb(sabi + sabi_config->header_offsets.iface_func), port);
-
-	/* write protect memory to make it safe */
-	outb(readb(sabi + sabi_config->header_offsets.re_mem), port);
-
-	/* see if the command actually succeeded */
-	complete = readb(sabi_iface + SABI_IFACE_COMPLETE);
-	iface_data = readb(sabi_iface + SABI_IFACE_DATA);
-	if (complete != 0xaa || iface_data == 0xff) {
-		pr_warn("SABI set command 0x%02x failed with completion flag 0x%02x and data 0x%02x\n",
-		       command, complete, iface_data);
-		retval = -EINVAL;
-	}
-
-	mutex_unlock(&sabi_mutex);
-	return retval;
-}
-
-static void test_backlight(void)
-{
-	struct sabi_retval sretval;
-
-	sabi_get_command(sabi_config->commands.get_backlight, &sretval);
-	printk(KERN_DEBUG "backlight = 0x%02x\n", sretval.retval[0]);
-
-	sabi_set_command(sabi_config->commands.set_backlight, 0);
-	printk(KERN_DEBUG "backlight should be off\n");
-
-	sabi_get_command(sabi_config->commands.get_backlight, &sretval);
-	printk(KERN_DEBUG "backlight = 0x%02x\n", sretval.retval[0]);
-
-	msleep(1000);
-
-	sabi_set_command(sabi_config->commands.set_backlight, 1);
-	printk(KERN_DEBUG "backlight should be on\n");
-
-	sabi_get_command(sabi_config->commands.get_backlight, &sretval);
-	printk(KERN_DEBUG "backlight = 0x%02x\n", sretval.retval[0]);
-}
-
-static void test_wireless(void)
-{
-	struct sabi_retval sretval;
-
-	sabi_get_command(sabi_config->commands.get_wireless_button, &sretval);
-	printk(KERN_DEBUG "wireless led = 0x%02x\n", sretval.retval[0]);
-
-	sabi_set_command(sabi_config->commands.set_wireless_button, 0);
-	printk(KERN_DEBUG "wireless led should be off\n");
-
-	sabi_get_command(sabi_config->commands.get_wireless_button, &sretval);
-	printk(KERN_DEBUG "wireless led = 0x%02x\n", sretval.retval[0]);
-
-	msleep(1000);
-
-	sabi_set_command(sabi_config->commands.set_wireless_button, 1);
-	printk(KERN_DEBUG "wireless led should be on\n");
-
-	sabi_get_command(sabi_config->commands.get_wireless_button, &sretval);
-	printk(KERN_DEBUG "wireless led = 0x%02x\n", sretval.retval[0]);
-}
-
-static u8 read_brightness(void)
-{
-	struct sabi_retval sretval;
-	int user_brightness = 0;
-	int retval;
-
-	retval = sabi_get_command(sabi_config->commands.get_brightness,
-				  &sretval);
-	if (!retval) {
-		user_brightness = sretval.retval[0];
-		if (user_brightness != 0)
-			user_brightness -= sabi_config->min_brightness;
-	}
-	return user_brightness;
-}
-
-static void set_brightness(u8 user_brightness)
-{
-	u8 user_level = user_brightness - sabi_config->min_brightness;
-
-	sabi_set_command(sabi_config->commands.set_brightness, user_level);
-}
-
-static int get_brightness(struct backlight_device *bd)
-{
-	return (int)read_brightness();
-}
-
-static int update_status(struct backlight_device *bd)
-{
-	set_brightness(bd->props.brightness);
-
-	if (bd->props.power == FB_BLANK_UNBLANK)
-		sabi_set_command(sabi_config->commands.set_backlight, 1);
-	else
-		sabi_set_command(sabi_config->commands.set_backlight, 0);
-	return 0;
-}
-
-static const struct backlight_ops backlight_ops = {
-	.get_brightness	= get_brightness,
-	.update_status	= update_status,
-};
-
-static int rfkill_set(void *data, bool blocked)
-{
-	/* Do something with blocked...*/
-	/*
-	 * blocked == false is on
-	 * blocked == true is off
-	 */
-	if (blocked)
-		sabi_set_command(sabi_config->commands.set_wireless_button, 0);
-	else
-		sabi_set_command(sabi_config->commands.set_wireless_button, 1);
-
-	return 0;
-}
-
-static struct rfkill_ops rfkill_ops = {
-	.set_block = rfkill_set,
-};
-
-static int init_wireless(struct platform_device *sdev)
-{
-	int retval;
-
-	rfk = rfkill_alloc("samsung-wifi", &sdev->dev, RFKILL_TYPE_WLAN,
-			   &rfkill_ops, NULL);
-	if (!rfk)
-		return -ENOMEM;
-
-	retval = rfkill_register(rfk);
-	if (retval) {
-		rfkill_destroy(rfk);
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-static void destroy_wireless(void)
-{
-	rfkill_unregister(rfk);
-	rfkill_destroy(rfk);
-}
-
-static ssize_t get_performance_level(struct device *dev,
-				     struct device_attribute *attr, char *buf)
-{
-	struct sabi_retval sretval;
-	int retval;
-	int i;
-
-	/* Read the state */
-	retval = sabi_get_command(sabi_config->commands.get_performance_level,
-				  &sretval);
-	if (retval)
-		return retval;
-
-	/* The logic is backwards, yeah, lots of fun... */
-	for (i = 0; sabi_config->performance_levels[i].name; ++i) {
-		if (sretval.retval[0] == sabi_config->performance_levels[i].value)
-			return sprintf(buf, "%s\n", sabi_config->performance_levels[i].name);
-	}
-	return sprintf(buf, "%s\n", "unknown");
-}
-
-static ssize_t set_performance_level(struct device *dev,
-				struct device_attribute *attr, const char *buf,
-				size_t count)
-{
-	if (count >= 1) {
-		int i;
-		for (i = 0; sabi_config->performance_levels[i].name; ++i) {
-			const struct sabi_performance_level *level =
-				&sabi_config->performance_levels[i];
-			if (!strncasecmp(level->name, buf, strlen(level->name))) {
-				sabi_set_command(sabi_config->commands.set_performance_level,
-						 level->value);
-				break;
-			}
-		}
-		if (!sabi_config->performance_levels[i].name)
-			return -EINVAL;
-	}
-	return count;
-}
-static DEVICE_ATTR(performance_level, S_IWUSR | S_IRUGO,
-		   get_performance_level, set_performance_level);
-
-
-static int __init dmi_check_cb(const struct dmi_system_id *id)
-{
-	pr_info("found laptop model '%s'\n",
-		id->ident);
-	return 0;
-}
-
-static struct dmi_system_id __initdata samsung_dmi_table[] = {
-	{
-		.ident = "N128",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N128"),
-			DMI_MATCH(DMI_BOARD_NAME, "N128"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "N130",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N130"),
-			DMI_MATCH(DMI_BOARD_NAME, "N130"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "X125",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X125"),
-			DMI_MATCH(DMI_BOARD_NAME, "X125"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "X120/X170",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X120/X170"),
-			DMI_MATCH(DMI_BOARD_NAME, "X120/X170"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "NC10",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "NC10"),
-			DMI_MATCH(DMI_BOARD_NAME, "NC10"),
-		},
-		.callback = dmi_check_cb,
-	},
-		{
-		.ident = "NP-Q45",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "SQ45S70S"),
-			DMI_MATCH(DMI_BOARD_NAME, "SQ45S70S"),
-		},
-		.callback = dmi_check_cb,
-		},
-	{
-		.ident = "X360",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X360"),
-			DMI_MATCH(DMI_BOARD_NAME, "X360"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "R410 Plus",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "R410P"),
-			DMI_MATCH(DMI_BOARD_NAME, "R460"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "R518",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "R518"),
-			DMI_MATCH(DMI_BOARD_NAME, "R518"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "R519/R719",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "R519/R719"),
-			DMI_MATCH(DMI_BOARD_NAME, "R519/R719"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "N150/N210/N220/N230",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220/N230"),
-			DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220/N230"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "N150P/N210P/N220P",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N150P/N210P/N220P"),
-			DMI_MATCH(DMI_BOARD_NAME, "N150P/N210P/N220P"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "R530/R730",
-		.matches = {
-		      DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
-		      DMI_MATCH(DMI_PRODUCT_NAME, "R530/R730"),
-		      DMI_MATCH(DMI_BOARD_NAME, "R530/R730"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "NF110/NF210/NF310",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "NF110/NF210/NF310"),
-			DMI_MATCH(DMI_BOARD_NAME, "NF110/NF210/NF310"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "N145P/N250P/N260P",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N145P/N250P/N260P"),
-			DMI_MATCH(DMI_BOARD_NAME, "N145P/N250P/N260P"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "R70/R71",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR,
-					"SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "R70/R71"),
-			DMI_MATCH(DMI_BOARD_NAME, "R70/R71"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{
-		.ident = "P460",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "P460"),
-			DMI_MATCH(DMI_BOARD_NAME, "P460"),
-		},
-		.callback = dmi_check_cb,
-	},
-	{ },
-};
-MODULE_DEVICE_TABLE(dmi, samsung_dmi_table);
-
-static int find_signature(void __iomem *memcheck, const char *testStr)
-{
-	int i = 0;
-	int loca;
-
-	for (loca = 0; loca < 0xffff; loca++) {
-		char temp = readb(memcheck + loca);
-
-		if (temp == testStr[i]) {
-			if (i == strlen(testStr)-1)
-				break;
-			++i;
-		} else {
-			i = 0;
-		}
-	}
-	return loca;
-}
-
-static int __init samsung_init(void)
-{
-	struct backlight_properties props;
-	struct sabi_retval sretval;
-	unsigned int ifaceP;
-	int i;
-	int loca;
-	int retval;
-
-	mutex_init(&sabi_mutex);
-
-	if (!force && !dmi_check_system(samsung_dmi_table))
-		return -ENODEV;
-
-	f0000_segment = ioremap_nocache(0xf0000, 0xffff);
-	if (!f0000_segment) {
-		pr_err("Can't map the segment at 0xf0000\n");
-		return -EINVAL;
-	}
-
-	/* Try to find one of the signatures in memory to find the header */
-	for (i = 0; sabi_configs[i].test_string != 0; ++i) {
-		sabi_config = &sabi_configs[i];
-		loca = find_signature(f0000_segment, sabi_config->test_string);
-		if (loca != 0xffff)
-			break;
-	}
-
-	if (loca == 0xffff) {
-		pr_err("This computer does not support SABI\n");
-		goto error_no_signature;
-	}
-
-	/* point to the SMI port Number */
-	loca += 1;
-	sabi = (f0000_segment + loca);
-
-	if (debug) {
-		printk(KERN_DEBUG "This computer supports SABI==%x\n",
-			loca + 0xf0000 - 6);
-		printk(KERN_DEBUG "SABI header:\n");
-		printk(KERN_DEBUG " SMI Port Number = 0x%04x\n",
-			readw(sabi + sabi_config->header_offsets.port));
-		printk(KERN_DEBUG " SMI Interface Function = 0x%02x\n",
-			readb(sabi + sabi_config->header_offsets.iface_func));
-		printk(KERN_DEBUG " SMI enable memory buffer = 0x%02x\n",
-			readb(sabi + sabi_config->header_offsets.en_mem));
-		printk(KERN_DEBUG " SMI restore memory buffer = 0x%02x\n",
-			readb(sabi + sabi_config->header_offsets.re_mem));
-		printk(KERN_DEBUG " SABI data offset = 0x%04x\n",
-			readw(sabi + sabi_config->header_offsets.data_offset));
-		printk(KERN_DEBUG " SABI data segment = 0x%04x\n",
-			readw(sabi + sabi_config->header_offsets.data_segment));
-	}
-
-	/* Get a pointer to the SABI Interface */
-	ifaceP = (readw(sabi + sabi_config->header_offsets.data_segment) & 0x0ffff) << 4;
-	ifaceP += readw(sabi + sabi_config->header_offsets.data_offset) & 0x0ffff;
-	sabi_iface = ioremap_nocache(ifaceP, 16);
-	if (!sabi_iface) {
-		pr_err("Can't remap %x\n", ifaceP);
-		goto exit;
-	}
-	if (debug) {
-		printk(KERN_DEBUG "ifaceP = 0x%08x\n", ifaceP);
-		printk(KERN_DEBUG "sabi_iface = %p\n", sabi_iface);
-
-		test_backlight();
-		test_wireless();
-
-		retval = sabi_get_command(sabi_config->commands.get_brightness,
-					  &sretval);
-		printk(KERN_DEBUG "brightness = 0x%02x\n", sretval.retval[0]);
-	}
-
-	/* Turn on "Linux" mode in the BIOS */
-	if (sabi_config->commands.set_linux != 0xff) {
-		retval = sabi_set_command(sabi_config->commands.set_linux,
-					  0x81);
-		if (retval) {
-			pr_warn("Linux mode was not set!\n");
-			goto error_no_platform;
-		}
-	}
-
-	/* knock up a platform device to hang stuff off of */
-	sdev = platform_device_register_simple("samsung", -1, NULL, 0);
-	if (IS_ERR(sdev))
-		goto error_no_platform;
-
-	/* create a backlight device to talk to this one */
-	memset(&props, 0, sizeof(struct backlight_properties));
-	props.type = BACKLIGHT_PLATFORM;
-	props.max_brightness = sabi_config->max_brightness;
-	backlight_device = backlight_device_register("samsung", &sdev->dev,
-						     NULL, &backlight_ops,
-						     &props);
-	if (IS_ERR(backlight_device))
-		goto error_no_backlight;
-
-	backlight_device->props.brightness = read_brightness();
-	backlight_device->props.power = FB_BLANK_UNBLANK;
-	backlight_update_status(backlight_device);
-
-	retval = init_wireless(sdev);
-	if (retval)
-		goto error_no_rfk;
-
-	retval = device_create_file(&sdev->dev, &dev_attr_performance_level);
-	if (retval)
-		goto error_file_create;
-
-exit:
-	return 0;
-
-error_file_create:
-	destroy_wireless();
-
-error_no_rfk:
-	backlight_device_unregister(backlight_device);
-
-error_no_backlight:
-	platform_device_unregister(sdev);
-
-error_no_platform:
-	iounmap(sabi_iface);
-
-error_no_signature:
-	iounmap(f0000_segment);
-	return -EINVAL;
-}
-
-static void __exit samsung_exit(void)
-{
-	/* Turn off "Linux" mode in the BIOS */
-	if (sabi_config->commands.set_linux != 0xff)
-		sabi_set_command(sabi_config->commands.set_linux, 0x80);
-
-	device_remove_file(&sdev->dev, &dev_attr_performance_level);
-	backlight_device_unregister(backlight_device);
-	destroy_wireless();
-	iounmap(sabi_iface);
-	iounmap(f0000_segment);
-	platform_device_unregister(sdev);
-}
-
-module_init(samsung_init);
-module_exit(samsung_exit);
-
-MODULE_AUTHOR("Greg Kroah-Hartman <gregkh@suse.de>");
-MODULE_DESCRIPTION("Samsung Backlight driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/solo6x10/Kconfig b/drivers/staging/solo6x10/Kconfig
index 2cf77c940860..03dcac4ea4d0 100644
--- a/drivers/staging/solo6x10/Kconfig
+++ b/drivers/staging/solo6x10/Kconfig
@@ -2,6 +2,7 @@ config SOLO6X10
 	tristate "Softlogic 6x10 MPEG codec cards"
 	depends on PCI && VIDEO_DEV && SND && I2C
 	select VIDEOBUF_DMA_SG
+	select SND_PCM
 	---help---
 	  This driver supports the Softlogic based MPEG-4 and h.264 codec
 	  codec cards.
diff --git a/drivers/staging/spectra/ffsport.c b/drivers/staging/spectra/ffsport.c
index 20dae73d3b78..506547b603e1 100644
--- a/drivers/staging/spectra/ffsport.c
+++ b/drivers/staging/spectra/ffsport.c
@@ -653,7 +653,7 @@ static int SBD_setup_device(struct spectra_nand_dev *dev, int which)
 	}
 	dev->queue->queuedata = dev;
 
-	/* As Linux block layer does't support >4KB hardware sector,  */
+	/* As Linux block layer doesn't support >4KB hardware sector,  */
 	/* Here we force report 512 byte hardware sector size to Kernel */
 	blk_queue_logical_block_size(dev->queue, 512);
 
diff --git a/drivers/staging/tidspbridge/dynload/cload.c b/drivers/staging/tidspbridge/dynload/cload.c
index 5cecd237e3f6..fe1ef0addb09 100644
--- a/drivers/staging/tidspbridge/dynload/cload.c
+++ b/drivers/staging/tidspbridge/dynload/cload.c
@@ -718,7 +718,7 @@ static void dload_symbols(struct dload_state *dlthis)
 	 * as a temporary for .dllview record construction.
 	 * Allocate storage for the whole table.  Add 1 to the section count
 	 * in case a trampoline section is auto-generated as well as the
-	 * size of the trampoline section name so DLLView does't get lost.
+	 * size of the trampoline section name so DLLView doesn't get lost.
 	 */
 
 	siz = sym_count * sizeof(struct local_symbol);
diff --git a/drivers/staging/tty/specialix.c b/drivers/staging/tty/specialix.c
index cb24c6d999db..5c3598ec7456 100644
--- a/drivers/staging/tty/specialix.c
+++ b/drivers/staging/tty/specialix.c
@@ -978,7 +978,7 @@ static void sx_change_speed(struct specialix_board *bp,
 	spin_lock_irqsave(&bp->lock, flags);
 	sx_out(bp, CD186x_CAR, port_No(port));
 
-	/* The Specialix board does't implement the RTS lines.
+	/* The Specialix board doesn't implement the RTS lines.
 	   They are used to set the IRQ level. Don't touch them. */
 	if (sx_crtscts(tty))
 		port->MSVR = MSVR_DTR | (sx_in(bp, CD186x_MSVR) & MSVR_RTS);
diff --git a/drivers/staging/usbip/vhci_hcd.c b/drivers/staging/usbip/vhci_hcd.c
index 0f02a4b12ae4..4f4f13321f40 100644
--- a/drivers/staging/usbip/vhci_hcd.c
+++ b/drivers/staging/usbip/vhci_hcd.c
@@ -876,8 +876,10 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
 	}
 
 	/* kill threads related to this sdev, if v.c. exists */
-	kthread_stop(vdev->ud.tcp_rx);
-	kthread_stop(vdev->ud.tcp_tx);
+	if (vdev->ud.tcp_rx)
+		kthread_stop(vdev->ud.tcp_rx);
+	if (vdev->ud.tcp_tx)
+		kthread_stop(vdev->ud.tcp_tx);
 
 	usbip_uinfo("stop threads\n");
 
@@ -949,9 +951,6 @@ static void vhci_device_init(struct vhci_device *vdev)
 {
 	memset(vdev, 0, sizeof(*vdev));
 
-	vdev->ud.tcp_rx = kthread_create(vhci_rx_loop, &vdev->ud, "vhci_rx");
-	vdev->ud.tcp_tx = kthread_create(vhci_tx_loop, &vdev->ud, "vhci_tx");
-
 	vdev->ud.side   = USBIP_VHCI;
 	vdev->ud.status = VDEV_ST_NULL;
 	/* vdev->ud.lock   = SPIN_LOCK_UNLOCKED; */
@@ -1139,7 +1138,7 @@ static int vhci_hcd_probe(struct platform_device *pdev)
 		usbip_uerr("create hcd failed\n");
 		return -ENOMEM;
 	}
-
+	hcd->has_tt = 1;
 
 	/* this is private data for vhci_hcd */
 	the_controller = hcd_to_vhci(hcd);
diff --git a/drivers/staging/usbip/vhci_sysfs.c b/drivers/staging/usbip/vhci_sysfs.c
index 3f2459f30415..e2dadbd5ef1e 100644
--- a/drivers/staging/usbip/vhci_sysfs.c
+++ b/drivers/staging/usbip/vhci_sysfs.c
@@ -21,6 +21,7 @@
 #include "vhci.h"
 
 #include <linux/in.h>
+#include <linux/kthread.h>
 
 /* TODO: refine locking ?*/
 
@@ -220,13 +221,13 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
 	vdev->ud.tcp_socket = socket;
 	vdev->ud.status     = VDEV_ST_NOTASSIGNED;
 
-	wake_up_process(vdev->ud.tcp_rx);
-	wake_up_process(vdev->ud.tcp_tx);
-
 	spin_unlock(&vdev->ud.lock);
 	spin_unlock(&the_controller->lock);
 	/* end the lock */
 
+	vdev->ud.tcp_rx = kthread_run(vhci_rx_loop, &vdev->ud, "vhci_rx");
+	vdev->ud.tcp_tx = kthread_run(vhci_tx_loop, &vdev->ud, "vhci_tx");
+
 	rh_port_connect(rhport, speed);
 
 	return count;
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index 6a71f52c59b1..76378397b763 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -273,7 +273,7 @@ exit:
 }
 
 int prism2_set_default_key(struct wiphy *wiphy, struct net_device *dev,
-			   u8 key_index)
+			   u8 key_index, bool unicast, bool multicast)
 {
 	wlandevice_t *wlandev = dev->ml_priv;
 
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index 9ef2dbbfa62b..5cb0f0ef6af0 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -30,5 +30,6 @@ config TCM_PSCSI
 	passthrough access to Linux/SCSI device
 
 source "drivers/target/loopback/Kconfig"
+source "drivers/target/tcm_fc/Kconfig"
 
 endif
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 1178bbfc68fe..21df808a992c 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -24,3 +24,5 @@ obj-$(CONFIG_TCM_PSCSI)		+= target_core_pscsi.o
 
 # Fabric modules
 obj-$(CONFIG_LOOPBACK_TARGET)	+= loopback/
+
+obj-$(CONFIG_TCM_FC)		+= tcm_fc/
diff --git a/drivers/target/tcm_fc/Kconfig b/drivers/target/tcm_fc/Kconfig
new file mode 100644
index 000000000000..40caf458e89e
--- /dev/null
+++ b/drivers/target/tcm_fc/Kconfig
@@ -0,0 +1,5 @@
+config TCM_FC
+	tristate "TCM_FC fabric Plugin"
+	depends on LIBFC
+	help
+	Say Y here to enable the TCM FC plugin for accessing FC fabrics in TCM
diff --git a/drivers/target/tcm_fc/Makefile b/drivers/target/tcm_fc/Makefile
new file mode 100644
index 000000000000..7a5c2b64cf65
--- /dev/null
+++ b/drivers/target/tcm_fc/Makefile
@@ -0,0 +1,15 @@
+EXTRA_CFLAGS += -I$(srctree)/drivers/target/ \
+		-I$(srctree)/drivers/scsi/ \
+		-I$(srctree)/include/scsi/ \
+		-I$(srctree)/drivers/target/tcm_fc/
+
+tcm_fc-y +=	tfc_cmd.o \
+		tfc_conf.o \
+		tfc_io.o \
+		tfc_sess.o
+
+obj-$(CONFIG_TCM_FC)	+= tcm_fc.o
+
+ifdef CONFIGFS_TCM_FC_DEBUG
+EXTRA_CFLAGS	+= -DTCM_FC_DEBUG
+endif
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
new file mode 100644
index 000000000000..defff32b7880
--- /dev/null
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2010 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef __TCM_FC_H__
+#define __TCM_FC_H__
+
+#define FT_VERSION "0.3"
+
+#define FT_NAMELEN 32		/* length of ASCII WWPNs including pad */
+#define FT_TPG_NAMELEN 32	/* max length of TPG name */
+#define FT_LUN_NAMELEN 32	/* max length of LUN name */
+
+/*
+ * Debug options.
+ */
+#define FT_DEBUG_CONF	0x01	/* configuration messages */
+#define	FT_DEBUG_SESS	0x02	/* session messages */
+#define	FT_DEBUG_TM	0x04	/* TM operations */
+#define	FT_DEBUG_IO	0x08	/* I/O commands */
+#define	FT_DEBUG_DATA	0x10	/* Data transfer */
+
+extern unsigned int ft_debug_logging;	/* debug options */
+
+#define FT_DEBUG(mask, fmt, args...)					\
+	do {								\
+		if (ft_debug_logging & (mask))				\
+			printk(KERN_INFO "tcm_fc: %s: " fmt,		\
+				__func__, ##args);			\
+	} while (0)
+
+#define	FT_CONF_DBG(fmt, args...)	FT_DEBUG(FT_DEBUG_CONF, fmt, ##args)
+#define	FT_SESS_DBG(fmt, args...)	FT_DEBUG(FT_DEBUG_SESS, fmt, ##args)
+#define	FT_TM_DBG(fmt, args...)		FT_DEBUG(FT_DEBUG_TM, fmt, ##args)
+#define	FT_IO_DBG(fmt, args...)		FT_DEBUG(FT_DEBUG_IO, fmt, ##args)
+#define	FT_DATA_DBG(fmt, args...)	FT_DEBUG(FT_DEBUG_DATA, fmt, ##args)
+
+struct ft_transport_id {
+	__u8	format;
+	__u8	__resvd1[7];
+	__u8	wwpn[8];
+	__u8	__resvd2[8];
+} __attribute__((__packed__));
+
+/*
+ * Session (remote port).
+ */
+struct ft_sess {
+	u32 port_id;			/* for hash lookup use only */
+	u32 params;
+	u16 max_frame;			/* maximum frame size */
+	u64 port_name;			/* port name for transport ID */
+	struct ft_tport *tport;
+	struct se_session *se_sess;
+	struct hlist_node hash;		/* linkage in ft_sess_hash table */
+	struct rcu_head rcu;
+	struct kref kref;		/* ref for hash and outstanding I/Os */
+};
+
+/*
+ * Hash table of sessions per local port.
+ * Hash lookup by remote port FC_ID.
+ */
+#define	FT_SESS_HASH_BITS	6
+#define	FT_SESS_HASH_SIZE	(1 << FT_SESS_HASH_BITS)
+
+/*
+ * Per local port data.
+ * This is created only after a TPG exists that allows target function
+ * for the local port.  If the TPG exists, this is allocated when
+ * we're notified that the local port has been created, or when
+ * the first PRLI provider callback is received.
+ */
+struct ft_tport {
+	struct fc_lport *lport;
+	struct ft_tpg *tpg;		/* NULL if TPG deleted before tport */
+	u32	sess_count;		/* number of sessions in hash */
+	struct rcu_head rcu;
+	struct hlist_head hash[FT_SESS_HASH_SIZE];	/* list of sessions */
+};
+
+/*
+ * Node ID and authentication.
+ */
+struct ft_node_auth {
+	u64	port_name;
+	u64	node_name;
+};
+
+/*
+ * Node ACL for FC remote port session.
+ */
+struct ft_node_acl {
+	struct ft_node_auth node_auth;
+	struct se_node_acl se_node_acl;
+};
+
+struct ft_lun {
+	u32 index;
+	char name[FT_LUN_NAMELEN];
+};
+
+/*
+ * Target portal group (local port).
+ */
+struct ft_tpg {
+	u32 index;
+	struct ft_lport_acl *lport_acl;
+	struct ft_tport *tport;		/* active tport or NULL */
+	struct list_head list;		/* linkage in ft_lport_acl tpg_list */
+	struct list_head lun_list;	/* head of LUNs */
+	struct se_portal_group se_tpg;
+	struct task_struct *thread;	/* processing thread */
+	struct se_queue_obj qobj;	/* queue for processing thread */
+};
+
+struct ft_lport_acl {
+	u64 wwpn;
+	char name[FT_NAMELEN];
+	struct list_head list;
+	struct list_head tpg_list;
+	struct se_wwn fc_lport_wwn;
+};
+
+enum ft_cmd_state {
+	FC_CMD_ST_NEW = 0,
+	FC_CMD_ST_REJ
+};
+
+/*
+ * Commands
+ */
+struct ft_cmd {
+	enum ft_cmd_state state;
+	u16 lun;			/* LUN from request */
+	struct ft_sess *sess;		/* session held for cmd */
+	struct fc_seq *seq;		/* sequence in exchange mgr */
+	struct se_cmd se_cmd;		/* Local TCM I/O descriptor */
+	struct fc_frame *req_frame;
+	unsigned char *cdb;		/* pointer to CDB inside frame */
+	u32 write_data_len;		/* data received on writes */
+	struct se_queue_req se_req;
+	/* Local sense buffer */
+	unsigned char ft_sense_buffer[TRANSPORT_SENSE_BUFFER];
+	u32 was_ddp_setup:1;		/* Set only if ddp is setup */
+	struct scatterlist *sg;		/* Set only if DDP is setup */
+	u32 sg_cnt;			/* No. of item in scatterlist */
+};
+
+extern struct list_head ft_lport_list;
+extern struct mutex ft_lport_lock;
+extern struct fc4_prov ft_prov;
+extern struct target_fabric_configfs *ft_configfs;
+
+/*
+ * Fabric methods.
+ */
+
+/*
+ * Session ops.
+ */
+void ft_sess_put(struct ft_sess *);
+int ft_sess_shutdown(struct se_session *);
+void ft_sess_close(struct se_session *);
+void ft_sess_stop(struct se_session *, int, int);
+int ft_sess_logged_in(struct se_session *);
+u32 ft_sess_get_index(struct se_session *);
+u32 ft_sess_get_port_name(struct se_session *, unsigned char *, u32);
+void ft_sess_set_erl0(struct se_session *);
+
+void ft_lport_add(struct fc_lport *, void *);
+void ft_lport_del(struct fc_lport *, void *);
+int ft_lport_notify(struct notifier_block *, unsigned long, void *);
+
+/*
+ * IO methods.
+ */
+void ft_check_stop_free(struct se_cmd *);
+void ft_release_cmd(struct se_cmd *);
+int ft_queue_status(struct se_cmd *);
+int ft_queue_data_in(struct se_cmd *);
+int ft_write_pending(struct se_cmd *);
+int ft_write_pending_status(struct se_cmd *);
+u32 ft_get_task_tag(struct se_cmd *);
+int ft_get_cmd_state(struct se_cmd *);
+void ft_new_cmd_failure(struct se_cmd *);
+int ft_queue_tm_resp(struct se_cmd *);
+int ft_is_state_remove(struct se_cmd *);
+
+/*
+ * other internal functions.
+ */
+int ft_thread(void *);
+void ft_recv_req(struct ft_sess *, struct fc_frame *);
+struct ft_tpg *ft_lport_find_tpg(struct fc_lport *);
+struct ft_node_acl *ft_acl_get(struct ft_tpg *, struct fc_rport_priv *);
+
+void ft_recv_write_data(struct ft_cmd *, struct fc_frame *);
+void ft_dump_cmd(struct ft_cmd *, const char *caller);
+
+ssize_t ft_format_wwn(char *, size_t, u64);
+
+#endif /* __TCM_FC_H__ */
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
new file mode 100644
index 000000000000..49e51778f733
--- /dev/null
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 2010 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* XXX TBD some includes may be extraneous */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/version.h>
+#include <generated/utsrelease.h>
+#include <linux/utsname.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/configfs.h>
+#include <linux/ctype.h>
+#include <linux/hash.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_base.h>
+#include <target/target_core_tmr.h>
+#include <target/configfs_macros.h>
+
+#include "tcm_fc.h"
+
+/*
+ * Dump cmd state for debugging.
+ */
+void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
+{
+	struct fc_exch *ep;
+	struct fc_seq *sp;
+	struct se_cmd *se_cmd;
+	struct se_mem *mem;
+	struct se_transport_task *task;
+
+	if (!(ft_debug_logging & FT_DEBUG_IO))
+		return;
+
+	se_cmd = &cmd->se_cmd;
+	printk(KERN_INFO "%s: cmd %p state %d sess %p seq %p se_cmd %p\n",
+		caller, cmd, cmd->state, cmd->sess, cmd->seq, se_cmd);
+	printk(KERN_INFO "%s: cmd %p cdb %p\n",
+		caller, cmd, cmd->cdb);
+	printk(KERN_INFO "%s: cmd %p lun %d\n", caller, cmd, cmd->lun);
+
+	task = T_TASK(se_cmd);
+	printk(KERN_INFO "%s: cmd %p task %p se_num %u buf %p len %u se_cmd_flags <0x%x>\n",
+	       caller, cmd, task, task->t_tasks_se_num,
+	       task->t_task_buf, se_cmd->data_length, se_cmd->se_cmd_flags);
+	if (task->t_mem_list)
+		list_for_each_entry(mem, task->t_mem_list, se_list)
+			printk(KERN_INFO "%s: cmd %p mem %p page %p "
+			       "len 0x%x off 0x%x\n",
+			       caller, cmd, mem,
+			       mem->se_page, mem->se_len, mem->se_off);
+	sp = cmd->seq;
+	if (sp) {
+		ep = fc_seq_exch(sp);
+		printk(KERN_INFO "%s: cmd %p sid %x did %x "
+			"ox_id %x rx_id %x seq_id %x e_stat %x\n",
+			caller, cmd, ep->sid, ep->did, ep->oxid, ep->rxid,
+			sp->id, ep->esb_stat);
+	}
+	print_hex_dump(KERN_INFO, "ft_dump_cmd ", DUMP_PREFIX_NONE,
+		16, 4, cmd->cdb, MAX_COMMAND_SIZE, 0);
+}
+
+/*
+ * Get LUN from CDB.
+ */
+static int ft_get_lun_for_cmd(struct ft_cmd *cmd, u8 *lunp)
+{
+	u64 lun;
+
+	lun = lunp[1];
+	switch (lunp[0] >> 6) {
+	case 0:
+		break;
+	case 1:
+		lun |= (lunp[0] & 0x3f) << 8;
+		break;
+	default:
+		return -1;
+	}
+	if (lun >= TRANSPORT_MAX_LUNS_PER_TPG)
+		return -1;
+	cmd->lun = lun;
+	return transport_get_lun_for_cmd(&cmd->se_cmd, NULL, lun);
+}
+
+static void ft_queue_cmd(struct ft_sess *sess, struct ft_cmd *cmd)
+{
+	struct se_queue_obj *qobj;
+	unsigned long flags;
+
+	qobj = &sess->tport->tpg->qobj;
+	spin_lock_irqsave(&qobj->cmd_queue_lock, flags);
+	list_add_tail(&cmd->se_req.qr_list, &qobj->qobj_list);
+	spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
+	atomic_inc(&qobj->queue_cnt);
+	wake_up_interruptible(&qobj->thread_wq);
+}
+
+static struct ft_cmd *ft_dequeue_cmd(struct se_queue_obj *qobj)
+{
+	unsigned long flags;
+	struct se_queue_req *qr;
+
+	spin_lock_irqsave(&qobj->cmd_queue_lock, flags);
+	if (list_empty(&qobj->qobj_list)) {
+		spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
+		return NULL;
+	}
+	qr = list_first_entry(&qobj->qobj_list, struct se_queue_req, qr_list);
+	list_del(&qr->qr_list);
+	atomic_dec(&qobj->queue_cnt);
+	spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
+	return container_of(qr, struct ft_cmd, se_req);
+}
+
+static void ft_free_cmd(struct ft_cmd *cmd)
+{
+	struct fc_frame *fp;
+	struct fc_lport *lport;
+
+	if (!cmd)
+		return;
+	fp = cmd->req_frame;
+	lport = fr_dev(fp);
+	if (fr_seq(fp))
+		lport->tt.seq_release(fr_seq(fp));
+	fc_frame_free(fp);
+	ft_sess_put(cmd->sess);	/* undo get from lookup at recv */
+	kfree(cmd);
+}
+
+void ft_release_cmd(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+
+	ft_free_cmd(cmd);
+}
+
+void ft_check_stop_free(struct se_cmd *se_cmd)
+{
+	transport_generic_free_cmd(se_cmd, 0, 1, 0);
+}
+
+/*
+ * Send response.
+ */
+int ft_queue_status(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+	struct fc_frame *fp;
+	struct fcp_resp_with_ext *fcp;
+	struct fc_lport *lport;
+	struct fc_exch *ep;
+	size_t len;
+
+	ft_dump_cmd(cmd, __func__);
+	ep = fc_seq_exch(cmd->seq);
+	lport = ep->lp;
+	len = sizeof(*fcp) + se_cmd->scsi_sense_length;
+	fp = fc_frame_alloc(lport, len);
+	if (!fp) {
+		/* XXX shouldn't just drop it - requeue and retry? */
+		return 0;
+	}
+	fcp = fc_frame_payload_get(fp, len);
+	memset(fcp, 0, len);
+	fcp->resp.fr_status = se_cmd->scsi_status;
+
+	len = se_cmd->scsi_sense_length;
+	if (len) {
+		fcp->resp.fr_flags |= FCP_SNS_LEN_VAL;
+		fcp->ext.fr_sns_len = htonl(len);
+		memcpy((fcp + 1), se_cmd->sense_buffer, len);
+	}
+
+	/*
+	 * Test underflow and overflow with one mask.  Usually both are off.
+	 * Bidirectional commands are not handled yet.
+	 */
+	if (se_cmd->se_cmd_flags & (SCF_OVERFLOW_BIT | SCF_UNDERFLOW_BIT)) {
+		if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT)
+			fcp->resp.fr_flags |= FCP_RESID_OVER;
+		else
+			fcp->resp.fr_flags |= FCP_RESID_UNDER;
+		fcp->ext.fr_resid = cpu_to_be32(se_cmd->residual_count);
+	}
+
+	/*
+	 * Send response.
+	 */
+	cmd->seq = lport->tt.seq_start_next(cmd->seq);
+	fc_fill_fc_hdr(fp, FC_RCTL_DD_CMD_STATUS, ep->did, ep->sid, FC_TYPE_FCP,
+		       FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ, 0);
+
+	lport->tt.seq_send(lport, cmd->seq, fp);
+	lport->tt.exch_done(cmd->seq);
+	return 0;
+}
+
+int ft_write_pending_status(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+
+	return cmd->write_data_len != se_cmd->data_length;
+}
+
+/*
+ * Send TX_RDY (transfer ready).
+ */
+int ft_write_pending(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+	struct fc_frame *fp;
+	struct fcp_txrdy *txrdy;
+	struct fc_lport *lport;
+	struct fc_exch *ep;
+	struct fc_frame_header *fh;
+	u32 f_ctl;
+
+	ft_dump_cmd(cmd, __func__);
+
+	ep = fc_seq_exch(cmd->seq);
+	lport = ep->lp;
+	fp = fc_frame_alloc(lport, sizeof(*txrdy));
+	if (!fp)
+		return PYX_TRANSPORT_OUT_OF_MEMORY_RESOURCES;
+
+	txrdy = fc_frame_payload_get(fp, sizeof(*txrdy));
+	memset(txrdy, 0, sizeof(*txrdy));
+	txrdy->ft_burst_len = htonl(se_cmd->data_length);
+
+	cmd->seq = lport->tt.seq_start_next(cmd->seq);
+	fc_fill_fc_hdr(fp, FC_RCTL_DD_DATA_DESC, ep->did, ep->sid, FC_TYPE_FCP,
+		       FC_FC_EX_CTX | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+
+	fh = fc_frame_header_get(fp);
+	f_ctl = ntoh24(fh->fh_f_ctl);
+
+	/* Only if it is 'Exchange Responder' */
+	if (f_ctl & FC_FC_EX_CTX) {
+		/* Target is 'exchange responder' and sending XFER_READY
+		 * to 'exchange initiator (initiator)'
+		 */
+		if ((ep->xid <= lport->lro_xid) &&
+		    (fh->fh_r_ctl == FC_RCTL_DD_DATA_DESC)) {
+			if (se_cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) {
+				/*
+				 * Map se_mem list to scatterlist, so that
+				 * DDP can be setup. DDP setup function require
+				 * scatterlist. se_mem_list is internal to
+				 * TCM/LIO target
+				 */
+				transport_do_task_sg_chain(se_cmd);
+				cmd->sg = T_TASK(se_cmd)->t_tasks_sg_chained;
+				cmd->sg_cnt =
+					T_TASK(se_cmd)->t_tasks_sg_chained_no;
+			}
+			if (cmd->sg && lport->tt.ddp_setup(lport, ep->xid,
+						    cmd->sg, cmd->sg_cnt))
+				cmd->was_ddp_setup = 1;
+		}
+	}
+	lport->tt.seq_send(lport, cmd->seq, fp);
+	return 0;
+}
+
+u32 ft_get_task_tag(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+
+	return fc_seq_exch(cmd->seq)->rxid;
+}
+
+int ft_get_cmd_state(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+
+	return cmd->state;
+}
+
+int ft_is_state_remove(struct se_cmd *se_cmd)
+{
+	return 0;	/* XXX TBD */
+}
+
+void ft_new_cmd_failure(struct se_cmd *se_cmd)
+{
+	/* XXX TBD */
+	printk(KERN_INFO "%s: se_cmd %p\n", __func__, se_cmd);
+}
+
+/*
+ * FC sequence response handler for follow-on sequences (data) and aborts.
+ */
+static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg)
+{
+	struct ft_cmd *cmd = arg;
+	struct fc_frame_header *fh;
+
+	if (IS_ERR(fp)) {
+		/* XXX need to find cmd if queued */
+		cmd->se_cmd.t_state = TRANSPORT_REMOVE;
+		cmd->seq = NULL;
+		transport_generic_free_cmd(&cmd->se_cmd, 0, 1, 0);
+		return;
+	}
+
+	fh = fc_frame_header_get(fp);
+
+	switch (fh->fh_r_ctl) {
+	case FC_RCTL_DD_SOL_DATA:	/* write data */
+		ft_recv_write_data(cmd, fp);
+		break;
+	case FC_RCTL_DD_UNSOL_CTL:	/* command */
+	case FC_RCTL_DD_SOL_CTL:	/* transfer ready */
+	case FC_RCTL_DD_DATA_DESC:	/* transfer ready */
+	default:
+		printk(KERN_INFO "%s: unhandled frame r_ctl %x\n",
+		       __func__, fh->fh_r_ctl);
+		fc_frame_free(fp);
+		transport_generic_free_cmd(&cmd->se_cmd, 0, 1, 0);
+		break;
+	}
+}
+
+/*
+ * Send a FCP response including SCSI status and optional FCP rsp_code.
+ * status is SAM_STAT_GOOD (zero) iff code is valid.
+ * This is used in error cases, such as allocation failures.
+ */
+static void ft_send_resp_status(struct fc_lport *lport,
+				const struct fc_frame *rx_fp,
+				u32 status, enum fcp_resp_rsp_codes code)
+{
+	struct fc_frame *fp;
+	struct fc_seq *sp;
+	const struct fc_frame_header *fh;
+	size_t len;
+	struct fcp_resp_with_ext *fcp;
+	struct fcp_resp_rsp_info *info;
+
+	fh = fc_frame_header_get(rx_fp);
+	FT_IO_DBG("FCP error response: did %x oxid %x status %x code %x\n",
+		  ntoh24(fh->fh_s_id), ntohs(fh->fh_ox_id), status, code);
+	len = sizeof(*fcp);
+	if (status == SAM_STAT_GOOD)
+		len += sizeof(*info);
+	fp = fc_frame_alloc(lport, len);
+	if (!fp)
+		return;
+	fcp = fc_frame_payload_get(fp, len);
+	memset(fcp, 0, len);
+	fcp->resp.fr_status = status;
+	if (status == SAM_STAT_GOOD) {
+		fcp->ext.fr_rsp_len = htonl(sizeof(*info));
+		fcp->resp.fr_flags |= FCP_RSP_LEN_VAL;
+		info = (struct fcp_resp_rsp_info *)(fcp + 1);
+		info->rsp_code = code;
+	}
+
+	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_DD_CMD_STATUS, 0);
+	sp = fr_seq(fp);
+	if (sp)
+		lport->tt.seq_send(lport, sp, fp);
+	else
+		lport->tt.frame_send(lport, fp);
+}
+
+/*
+ * Send error or task management response.
+ * Always frees the cmd and associated state.
+ */
+static void ft_send_resp_code(struct ft_cmd *cmd, enum fcp_resp_rsp_codes code)
+{
+	ft_send_resp_status(cmd->sess->tport->lport,
+			    cmd->req_frame, SAM_STAT_GOOD, code);
+	ft_free_cmd(cmd);
+}
+
+/*
+ * Handle Task Management Request.
+ */
+static void ft_send_tm(struct ft_cmd *cmd)
+{
+	struct se_tmr_req *tmr;
+	struct fcp_cmnd *fcp;
+	u8 tm_func;
+
+	fcp = fc_frame_payload_get(cmd->req_frame, sizeof(*fcp));
+
+	switch (fcp->fc_tm_flags) {
+	case FCP_TMF_LUN_RESET:
+		tm_func = TMR_LUN_RESET;
+		if (ft_get_lun_for_cmd(cmd, fcp->fc_lun) < 0) {
+			ft_dump_cmd(cmd, __func__);
+			transport_send_check_condition_and_sense(&cmd->se_cmd,
+				cmd->se_cmd.scsi_sense_reason, 0);
+			ft_sess_put(cmd->sess);
+			return;
+		}
+		break;
+	case FCP_TMF_TGT_RESET:
+		tm_func = TMR_TARGET_WARM_RESET;
+		break;
+	case FCP_TMF_CLR_TASK_SET:
+		tm_func = TMR_CLEAR_TASK_SET;
+		break;
+	case FCP_TMF_ABT_TASK_SET:
+		tm_func = TMR_ABORT_TASK_SET;
+		break;
+	case FCP_TMF_CLR_ACA:
+		tm_func = TMR_CLEAR_ACA;
+		break;
+	default:
+		/*
+		 * FCP4r01 indicates having a combination of
+		 * tm_flags set is invalid.
+		 */
+		FT_TM_DBG("invalid FCP tm_flags %x\n", fcp->fc_tm_flags);
+		ft_send_resp_code(cmd, FCP_CMND_FIELDS_INVALID);
+		return;
+	}
+
+	FT_TM_DBG("alloc tm cmd fn %d\n", tm_func);
+	tmr = core_tmr_alloc_req(&cmd->se_cmd, cmd, tm_func);
+	if (!tmr) {
+		FT_TM_DBG("alloc failed\n");
+		ft_send_resp_code(cmd, FCP_TMF_FAILED);
+		return;
+	}
+	cmd->se_cmd.se_tmr_req = tmr;
+	transport_generic_handle_tmr(&cmd->se_cmd);
+}
+
+/*
+ * Send status from completed task management request.
+ */
+int ft_queue_tm_resp(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+	struct se_tmr_req *tmr = se_cmd->se_tmr_req;
+	enum fcp_resp_rsp_codes code;
+
+	switch (tmr->response) {
+	case TMR_FUNCTION_COMPLETE:
+		code = FCP_TMF_CMPL;
+		break;
+	case TMR_LUN_DOES_NOT_EXIST:
+		code = FCP_TMF_INVALID_LUN;
+		break;
+	case TMR_FUNCTION_REJECTED:
+		code = FCP_TMF_REJECTED;
+		break;
+	case TMR_TASK_DOES_NOT_EXIST:
+	case TMR_TASK_STILL_ALLEGIANT:
+	case TMR_TASK_FAILOVER_NOT_SUPPORTED:
+	case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED:
+	case TMR_FUNCTION_AUTHORIZATION_FAILED:
+	default:
+		code = FCP_TMF_FAILED;
+		break;
+	}
+	FT_TM_DBG("tmr fn %d resp %d fcp code %d\n",
+		  tmr->function, tmr->response, code);
+	ft_send_resp_code(cmd, code);
+	return 0;
+}
+
+/*
+ * Handle incoming FCP command.
+ */
+static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp)
+{
+	struct ft_cmd *cmd;
+	struct fc_lport *lport = sess->tport->lport;
+
+	cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
+	if (!cmd)
+		goto busy;
+	cmd->sess = sess;
+	cmd->seq = lport->tt.seq_assign(lport, fp);
+	if (!cmd->seq) {
+		kfree(cmd);
+		goto busy;
+	}
+	cmd->req_frame = fp;		/* hold frame during cmd */
+	ft_queue_cmd(sess, cmd);
+	return;
+
+busy:
+	FT_IO_DBG("cmd or seq allocation failure - sending BUSY\n");
+	ft_send_resp_status(lport, fp, SAM_STAT_BUSY, 0);
+	fc_frame_free(fp);
+	ft_sess_put(sess);		/* undo get from lookup */
+}
+
+
+/*
+ * Handle incoming FCP frame.
+ * Caller has verified that the frame is type FCP.
+ */
+void ft_recv_req(struct ft_sess *sess, struct fc_frame *fp)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+
+	switch (fh->fh_r_ctl) {
+	case FC_RCTL_DD_UNSOL_CMD:	/* command */
+		ft_recv_cmd(sess, fp);
+		break;
+	case FC_RCTL_DD_SOL_DATA:	/* write data */
+	case FC_RCTL_DD_UNSOL_CTL:
+	case FC_RCTL_DD_SOL_CTL:
+	case FC_RCTL_DD_DATA_DESC:	/* transfer ready */
+	case FC_RCTL_ELS4_REQ:		/* SRR, perhaps */
+	default:
+		printk(KERN_INFO "%s: unhandled frame r_ctl %x\n",
+		       __func__, fh->fh_r_ctl);
+		fc_frame_free(fp);
+		ft_sess_put(sess);	/* undo get from lookup */
+		break;
+	}
+}
+
+/*
+ * Send new command to target.
+ */
+static void ft_send_cmd(struct ft_cmd *cmd)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(cmd->req_frame);
+	struct se_cmd *se_cmd;
+	struct fcp_cmnd *fcp;
+	int data_dir;
+	u32 data_len;
+	int task_attr;
+	int ret;
+
+	fcp = fc_frame_payload_get(cmd->req_frame, sizeof(*fcp));
+	if (!fcp)
+		goto err;
+
+	if (fcp->fc_flags & FCP_CFL_LEN_MASK)
+		goto err;		/* not handling longer CDBs yet */
+
+	if (fcp->fc_tm_flags) {
+		task_attr = FCP_PTA_SIMPLE;
+		data_dir = DMA_NONE;
+		data_len = 0;
+	} else {
+		switch (fcp->fc_flags & (FCP_CFL_RDDATA | FCP_CFL_WRDATA)) {
+		case 0:
+			data_dir = DMA_NONE;
+			break;
+		case FCP_CFL_RDDATA:
+			data_dir = DMA_FROM_DEVICE;
+			break;
+		case FCP_CFL_WRDATA:
+			data_dir = DMA_TO_DEVICE;
+			break;
+		case FCP_CFL_WRDATA | FCP_CFL_RDDATA:
+			goto err;	/* TBD not supported by tcm_fc yet */
+		}
+
+		/* FCP_PTA_ maps 1:1 to TASK_ATTR_ */
+		task_attr = fcp->fc_pri_ta & FCP_PTA_MASK;
+		data_len = ntohl(fcp->fc_dl);
+		cmd->cdb = fcp->fc_cdb;
+	}
+
+	se_cmd = &cmd->se_cmd;
+	/*
+	 * Initialize struct se_cmd descriptor from target_core_mod
+	 * infrastructure
+	 */
+	transport_init_se_cmd(se_cmd, &ft_configfs->tf_ops, cmd->sess->se_sess,
+			      data_len, data_dir, task_attr,
+			      &cmd->ft_sense_buffer[0]);
+	/*
+	 * Check for FCP task management flags
+	 */
+	if (fcp->fc_tm_flags) {
+		ft_send_tm(cmd);
+		return;
+	}
+
+	fc_seq_exch(cmd->seq)->lp->tt.seq_set_resp(cmd->seq, ft_recv_seq, cmd);
+
+	ret = ft_get_lun_for_cmd(cmd, fcp->fc_lun);
+	if (ret < 0) {
+		ft_dump_cmd(cmd, __func__);
+		transport_send_check_condition_and_sense(&cmd->se_cmd,
+			cmd->se_cmd.scsi_sense_reason, 0);
+		return;
+	}
+
+	ret = transport_generic_allocate_tasks(se_cmd, cmd->cdb);
+
+	FT_IO_DBG("r_ctl %x alloc task ret %d\n", fh->fh_r_ctl, ret);
+	ft_dump_cmd(cmd, __func__);
+
+	if (ret == -1) {
+		transport_send_check_condition_and_sense(se_cmd,
+				TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
+		transport_generic_free_cmd(se_cmd, 0, 1, 0);
+		return;
+	}
+	if (ret == -2) {
+		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
+			ft_queue_status(se_cmd);
+		else
+			transport_send_check_condition_and_sense(se_cmd,
+					se_cmd->scsi_sense_reason, 0);
+		transport_generic_free_cmd(se_cmd, 0, 1, 0);
+		return;
+	}
+	transport_generic_handle_cdb(se_cmd);
+	return;
+
+err:
+	ft_send_resp_code(cmd, FCP_CMND_FIELDS_INVALID);
+	return;
+}
+
+/*
+ * Handle request in the command thread.
+ */
+static void ft_exec_req(struct ft_cmd *cmd)
+{
+	FT_IO_DBG("cmd state %x\n", cmd->state);
+	switch (cmd->state) {
+	case FC_CMD_ST_NEW:
+		ft_send_cmd(cmd);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Processing thread.
+ * Currently one thread per tpg.
+ */
+int ft_thread(void *arg)
+{
+	struct ft_tpg *tpg = arg;
+	struct se_queue_obj *qobj = &tpg->qobj;
+	struct ft_cmd *cmd;
+	int ret;
+
+	set_user_nice(current, -20);
+
+	while (!kthread_should_stop()) {
+		ret = wait_event_interruptible(qobj->thread_wq,
+			atomic_read(&qobj->queue_cnt) || kthread_should_stop());
+		if (ret < 0 || kthread_should_stop())
+			goto out;
+		cmd = ft_dequeue_cmd(qobj);
+		if (cmd)
+			ft_exec_req(cmd);
+	}
+
+out:
+	return 0;
+}
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
new file mode 100644
index 000000000000..fcdbbffe88cc
--- /dev/null
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -0,0 +1,677 @@
+/*******************************************************************************
+ * Filename:  tcm_fc.c
+ *
+ * This file contains the configfs implementation for TCM_fc fabric node.
+ * Based on tcm_loop_configfs.c
+ *
+ * Copyright (c) 2010 Cisco Systems, Inc.
+ * Copyright (c) 2009,2010 Rising Tide, Inc.
+ * Copyright (c) 2009,2010 Linux-iSCSI.org
+ *
+ * Copyright (c) 2009,2010 Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ****************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/version.h>
+#include <generated/utsrelease.h>
+#include <linux/utsname.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/configfs.h>
+#include <linux/ctype.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/libfc.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_fabric_configfs.h>
+#include <target/target_core_fabric_lib.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_base.h>
+#include <target/configfs_macros.h>
+
+#include "tcm_fc.h"
+
+struct target_fabric_configfs *ft_configfs;
+
+LIST_HEAD(ft_lport_list);
+DEFINE_MUTEX(ft_lport_lock);
+
+unsigned int ft_debug_logging;
+module_param_named(debug_logging, ft_debug_logging, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(debug_logging, "a bit mask of logging levels");
+
+/*
+ * Parse WWN.
+ * If strict, we require lower-case hex and colon separators to be sure
+ * the name is the same as what would be generated by ft_format_wwn()
+ * so the name and wwn are mapped one-to-one.
+ */
+static ssize_t ft_parse_wwn(const char *name, u64 *wwn, int strict)
+{
+	const char *cp;
+	char c;
+	u32 nibble;
+	u32 byte = 0;
+	u32 pos = 0;
+	u32 err;
+
+	*wwn = 0;
+	for (cp = name; cp < &name[FT_NAMELEN - 1]; cp++) {
+		c = *cp;
+		if (c == '\n' && cp[1] == '\0')
+			continue;
+		if (strict && pos++ == 2 && byte++ < 7) {
+			pos = 0;
+			if (c == ':')
+				continue;
+			err = 1;
+			goto fail;
+		}
+		if (c == '\0') {
+			err = 2;
+			if (strict && byte != 8)
+				goto fail;
+			return cp - name;
+		}
+		err = 3;
+		if (isdigit(c))
+			nibble = c - '0';
+		else if (isxdigit(c) && (islower(c) || !strict))
+			nibble = tolower(c) - 'a' + 10;
+		else
+			goto fail;
+		*wwn = (*wwn << 4) | nibble;
+	}
+	err = 4;
+fail:
+	FT_CONF_DBG("err %u len %zu pos %u byte %u\n",
+		    err, cp - name, pos, byte);
+	return -1;
+}
+
+ssize_t ft_format_wwn(char *buf, size_t len, u64 wwn)
+{
+	u8 b[8];
+
+	put_unaligned_be64(wwn, b);
+	return snprintf(buf, len,
+		 "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x",
+		 b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
+}
+
+static ssize_t ft_wwn_show(void *arg, char *buf)
+{
+	u64 *wwn = arg;
+	ssize_t len;
+
+	len = ft_format_wwn(buf, PAGE_SIZE - 2, *wwn);
+	buf[len++] = '\n';
+	return len;
+}
+
+static ssize_t ft_wwn_store(void *arg, const char *buf, size_t len)
+{
+	ssize_t ret;
+	u64 wwn;
+
+	ret = ft_parse_wwn(buf, &wwn, 0);
+	if (ret > 0)
+		*(u64 *)arg = wwn;
+	return ret;
+}
+
+/*
+ * ACL auth ops.
+ */
+
+static ssize_t ft_nacl_show_port_name(
+	struct se_node_acl *se_nacl,
+	char *page)
+{
+	struct ft_node_acl *acl = container_of(se_nacl,
+			struct ft_node_acl, se_node_acl);
+
+	return ft_wwn_show(&acl->node_auth.port_name, page);
+}
+
+static ssize_t ft_nacl_store_port_name(
+	struct se_node_acl *se_nacl,
+	const char *page,
+	size_t count)
+{
+	struct ft_node_acl *acl = container_of(se_nacl,
+			struct ft_node_acl, se_node_acl);
+
+	return ft_wwn_store(&acl->node_auth.port_name, page, count);
+}
+
+TF_NACL_BASE_ATTR(ft, port_name, S_IRUGO | S_IWUSR);
+
+static ssize_t ft_nacl_show_node_name(
+	struct se_node_acl *se_nacl,
+	char *page)
+{
+	struct ft_node_acl *acl = container_of(se_nacl,
+			struct ft_node_acl, se_node_acl);
+
+	return ft_wwn_show(&acl->node_auth.node_name, page);
+}
+
+static ssize_t ft_nacl_store_node_name(
+	struct se_node_acl *se_nacl,
+	const char *page,
+	size_t count)
+{
+	struct ft_node_acl *acl = container_of(se_nacl,
+			struct ft_node_acl, se_node_acl);
+
+	return ft_wwn_store(&acl->node_auth.node_name, page, count);
+}
+
+TF_NACL_BASE_ATTR(ft, node_name, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *ft_nacl_base_attrs[] = {
+	&ft_nacl_port_name.attr,
+	&ft_nacl_node_name.attr,
+	NULL,
+};
+
+/*
+ * ACL ops.
+ */
+
+/*
+ * Add ACL for an initiator.  The ACL is named arbitrarily.
+ * The port_name and/or node_name are attributes.
+ */
+static struct se_node_acl *ft_add_acl(
+	struct se_portal_group *se_tpg,
+	struct config_group *group,
+	const char *name)
+{
+	struct ft_node_acl *acl;
+	struct ft_tpg *tpg;
+	u64 wwpn;
+	u32 q_depth;
+
+	FT_CONF_DBG("add acl %s\n", name);
+	tpg = container_of(se_tpg, struct ft_tpg, se_tpg);
+
+	if (ft_parse_wwn(name, &wwpn, 1) < 0)
+		return ERR_PTR(-EINVAL);
+
+	acl = kzalloc(sizeof(struct ft_node_acl), GFP_KERNEL);
+	if (!(acl))
+		return ERR_PTR(-ENOMEM);
+	acl->node_auth.port_name = wwpn;
+
+	q_depth = 32;		/* XXX bogus default - get from tpg? */
+	return core_tpg_add_initiator_node_acl(&tpg->se_tpg,
+				&acl->se_node_acl, name, q_depth);
+}
+
+static void ft_del_acl(struct se_node_acl *se_acl)
+{
+	struct se_portal_group *se_tpg = se_acl->se_tpg;
+	struct ft_tpg *tpg;
+	struct ft_node_acl *acl = container_of(se_acl,
+				struct ft_node_acl, se_node_acl);
+
+	FT_CONF_DBG("del acl %s\n",
+		config_item_name(&se_acl->acl_group.cg_item));
+
+	tpg = container_of(se_tpg, struct ft_tpg, se_tpg);
+	FT_CONF_DBG("del acl %p se_acl %p tpg %p se_tpg %p\n",
+		    acl, se_acl, tpg, &tpg->se_tpg);
+
+	core_tpg_del_initiator_node_acl(&tpg->se_tpg, se_acl, 1);
+	kfree(acl);
+}
+
+struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
+{
+	struct ft_node_acl *found = NULL;
+	struct ft_node_acl *acl;
+	struct se_portal_group *se_tpg = &tpg->se_tpg;
+	struct se_node_acl *se_acl;
+
+	spin_lock_bh(&se_tpg->acl_node_lock);
+	list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) {
+		acl = container_of(se_acl, struct ft_node_acl, se_node_acl);
+		FT_CONF_DBG("acl %p port_name %llx\n",
+			acl, (unsigned long long)acl->node_auth.port_name);
+		if (acl->node_auth.port_name == rdata->ids.port_name ||
+		    acl->node_auth.node_name == rdata->ids.node_name) {
+			FT_CONF_DBG("acl %p port_name %llx matched\n", acl,
+				    (unsigned long long)rdata->ids.port_name);
+			found = acl;
+			/* XXX need to hold onto ACL */
+			break;
+		}
+	}
+	spin_unlock_bh(&se_tpg->acl_node_lock);
+	return found;
+}
+
+struct se_node_acl *ft_tpg_alloc_fabric_acl(struct se_portal_group *se_tpg)
+{
+	struct ft_node_acl *acl;
+
+	acl = kzalloc(sizeof(*acl), GFP_KERNEL);
+	if (!(acl)) {
+		printk(KERN_ERR "Unable to allocate struct ft_node_acl\n");
+		return NULL;
+	}
+	FT_CONF_DBG("acl %p\n", acl);
+	return &acl->se_node_acl;
+}
+
+static void ft_tpg_release_fabric_acl(struct se_portal_group *se_tpg,
+				      struct se_node_acl *se_acl)
+{
+	struct ft_node_acl *acl = container_of(se_acl,
+				struct ft_node_acl, se_node_acl);
+
+	FT_CONF_DBG(KERN_INFO "acl %p\n", acl);
+	kfree(acl);
+}
+
+/*
+ * local_port port_group (tpg) ops.
+ */
+static struct se_portal_group *ft_add_tpg(
+	struct se_wwn *wwn,
+	struct config_group *group,
+	const char *name)
+{
+	struct ft_lport_acl *lacl;
+	struct ft_tpg *tpg;
+	unsigned long index;
+	int ret;
+
+	FT_CONF_DBG("tcm_fc: add tpg %s\n", name);
+
+	/*
+	 * Name must be "tpgt_" followed by the index.
+	 */
+	if (strstr(name, "tpgt_") != name)
+		return NULL;
+	if (strict_strtoul(name + 5, 10, &index) || index > UINT_MAX)
+		return NULL;
+
+	lacl = container_of(wwn, struct ft_lport_acl, fc_lport_wwn);
+	tpg = kzalloc(sizeof(*tpg), GFP_KERNEL);
+	if (!tpg)
+		return NULL;
+	tpg->index = index;
+	tpg->lport_acl = lacl;
+	INIT_LIST_HEAD(&tpg->lun_list);
+	transport_init_queue_obj(&tpg->qobj);
+
+	ret = core_tpg_register(&ft_configfs->tf_ops, wwn, &tpg->se_tpg,
+				(void *)tpg, TRANSPORT_TPG_TYPE_NORMAL);
+	if (ret < 0) {
+		kfree(tpg);
+		return NULL;
+	}
+
+	tpg->thread = kthread_run(ft_thread, tpg, "ft_tpg%lu", index);
+	if (IS_ERR(tpg->thread)) {
+		kfree(tpg);
+		return NULL;
+	}
+
+	mutex_lock(&ft_lport_lock);
+	list_add_tail(&tpg->list, &lacl->tpg_list);
+	mutex_unlock(&ft_lport_lock);
+
+	return &tpg->se_tpg;
+}
+
+static void ft_del_tpg(struct se_portal_group *se_tpg)
+{
+	struct ft_tpg *tpg = container_of(se_tpg, struct ft_tpg, se_tpg);
+
+	FT_CONF_DBG("del tpg %s\n",
+		    config_item_name(&tpg->se_tpg.tpg_group.cg_item));
+
+	kthread_stop(tpg->thread);
+
+	/* Wait for sessions to be freed thru RCU, for BUG_ON below */
+	synchronize_rcu();
+
+	mutex_lock(&ft_lport_lock);
+	list_del(&tpg->list);
+	if (tpg->tport) {
+		tpg->tport->tpg = NULL;
+		tpg->tport = NULL;
+	}
+	mutex_unlock(&ft_lport_lock);
+
+	core_tpg_deregister(se_tpg);
+	kfree(tpg);
+}
+
+/*
+ * Verify that an lport is configured to use the tcm_fc module, and return
+ * the target port group that should be used.
+ *
+ * The caller holds ft_lport_lock.
+ */
+struct ft_tpg *ft_lport_find_tpg(struct fc_lport *lport)
+{
+	struct ft_lport_acl *lacl;
+	struct ft_tpg *tpg;
+
+	list_for_each_entry(lacl, &ft_lport_list, list) {
+		if (lacl->wwpn == lport->wwpn) {
+			list_for_each_entry(tpg, &lacl->tpg_list, list)
+				return tpg; /* XXX for now return first entry */
+			return NULL;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * target config instance ops.
+ */
+
+/*
+ * Add lport to allowed config.
+ * The name is the WWPN in lower-case ASCII, colon-separated bytes.
+ */
+static struct se_wwn *ft_add_lport(
+	struct target_fabric_configfs *tf,
+	struct config_group *group,
+	const char *name)
+{
+	struct ft_lport_acl *lacl;
+	struct ft_lport_acl *old_lacl;
+	u64 wwpn;
+
+	FT_CONF_DBG("add lport %s\n", name);
+	if (ft_parse_wwn(name, &wwpn, 1) < 0)
+		return NULL;
+	lacl = kzalloc(sizeof(*lacl), GFP_KERNEL);
+	if (!lacl)
+		return NULL;
+	lacl->wwpn = wwpn;
+	INIT_LIST_HEAD(&lacl->tpg_list);
+
+	mutex_lock(&ft_lport_lock);
+	list_for_each_entry(old_lacl, &ft_lport_list, list) {
+		if (old_lacl->wwpn == wwpn) {
+			mutex_unlock(&ft_lport_lock);
+			kfree(lacl);
+			return NULL;
+		}
+	}
+	list_add_tail(&lacl->list, &ft_lport_list);
+	ft_format_wwn(lacl->name, sizeof(lacl->name), wwpn);
+	mutex_unlock(&ft_lport_lock);
+
+	return &lacl->fc_lport_wwn;
+}
+
+static void ft_del_lport(struct se_wwn *wwn)
+{
+	struct ft_lport_acl *lacl = container_of(wwn,
+				struct ft_lport_acl, fc_lport_wwn);
+
+	FT_CONF_DBG("del lport %s\n",
+			config_item_name(&wwn->wwn_group.cg_item));
+	mutex_lock(&ft_lport_lock);
+	list_del(&lacl->list);
+	mutex_unlock(&ft_lport_lock);
+
+	kfree(lacl);
+}
+
+static ssize_t ft_wwn_show_attr_version(
+	struct target_fabric_configfs *tf,
+	char *page)
+{
+	return sprintf(page, "TCM FC " FT_VERSION " on %s/%s on "
+		""UTS_RELEASE"\n",  utsname()->sysname, utsname()->machine);
+}
+
+TF_WWN_ATTR_RO(ft, version);
+
+static struct configfs_attribute *ft_wwn_attrs[] = {
+	&ft_wwn_version.attr,
+	NULL,
+};
+
+static char *ft_get_fabric_name(void)
+{
+	return "fc";
+}
+
+static char *ft_get_fabric_wwn(struct se_portal_group *se_tpg)
+{
+	struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return tpg->lport_acl->name;
+}
+
+static u16 ft_get_tag(struct se_portal_group *se_tpg)
+{
+	struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	/*
+	 * This tag is used when forming SCSI Name identifier in EVPD=1 0x83
+	 * to represent the SCSI Target Port.
+	 */
+	return tpg->index;
+}
+
+static u32 ft_get_default_depth(struct se_portal_group *se_tpg)
+{
+	return 1;
+}
+
+static int ft_check_false(struct se_portal_group *se_tpg)
+{
+	return 0;
+}
+
+static void ft_set_default_node_attr(struct se_node_acl *se_nacl)
+{
+}
+
+static u16 ft_get_fabric_sense_len(void)
+{
+	return 0;
+}
+
+static u16 ft_set_fabric_sense_len(struct se_cmd *se_cmd, u32 sense_len)
+{
+	return 0;
+}
+
+static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg)
+{
+	struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr;
+
+	return tpg->index;
+}
+
+static u64 ft_pack_lun(unsigned int index)
+{
+	WARN_ON(index >= 256);
+	/* Caller wants this byte-swapped */
+	return cpu_to_le64((index & 0xff) << 8);
+}
+
+static struct target_core_fabric_ops ft_fabric_ops = {
+	.get_fabric_name =		ft_get_fabric_name,
+	.get_fabric_proto_ident =	fc_get_fabric_proto_ident,
+	.tpg_get_wwn =			ft_get_fabric_wwn,
+	.tpg_get_tag =			ft_get_tag,
+	.tpg_get_default_depth =	ft_get_default_depth,
+	.tpg_get_pr_transport_id =	fc_get_pr_transport_id,
+	.tpg_get_pr_transport_id_len =	fc_get_pr_transport_id_len,
+	.tpg_parse_pr_out_transport_id = fc_parse_pr_out_transport_id,
+	.tpg_check_demo_mode =		ft_check_false,
+	.tpg_check_demo_mode_cache =	ft_check_false,
+	.tpg_check_demo_mode_write_protect = ft_check_false,
+	.tpg_check_prod_mode_write_protect = ft_check_false,
+	.tpg_alloc_fabric_acl =		ft_tpg_alloc_fabric_acl,
+	.tpg_release_fabric_acl =	ft_tpg_release_fabric_acl,
+	.tpg_get_inst_index =		ft_tpg_get_inst_index,
+	.check_stop_free =		ft_check_stop_free,
+	.release_cmd_to_pool =		ft_release_cmd,
+	.release_cmd_direct =		ft_release_cmd,
+	.shutdown_session =		ft_sess_shutdown,
+	.close_session =		ft_sess_close,
+	.stop_session =			ft_sess_stop,
+	.fall_back_to_erl0 =		ft_sess_set_erl0,
+	.sess_logged_in =		ft_sess_logged_in,
+	.sess_get_index =		ft_sess_get_index,
+	.sess_get_initiator_sid =	NULL,
+	.write_pending =		ft_write_pending,
+	.write_pending_status =		ft_write_pending_status,
+	.set_default_node_attributes =	ft_set_default_node_attr,
+	.get_task_tag =			ft_get_task_tag,
+	.get_cmd_state =		ft_get_cmd_state,
+	.new_cmd_failure =		ft_new_cmd_failure,
+	.queue_data_in =		ft_queue_data_in,
+	.queue_status =			ft_queue_status,
+	.queue_tm_rsp =			ft_queue_tm_resp,
+	.get_fabric_sense_len =		ft_get_fabric_sense_len,
+	.set_fabric_sense_len =		ft_set_fabric_sense_len,
+	.is_state_remove =		ft_is_state_remove,
+	.pack_lun =			ft_pack_lun,
+	/*
+	 * Setup function pointers for generic logic in
+	 * target_core_fabric_configfs.c
+	 */
+	.fabric_make_wwn =		&ft_add_lport,
+	.fabric_drop_wwn =		&ft_del_lport,
+	.fabric_make_tpg =		&ft_add_tpg,
+	.fabric_drop_tpg =		&ft_del_tpg,
+	.fabric_post_link =		NULL,
+	.fabric_pre_unlink =		NULL,
+	.fabric_make_np =		NULL,
+	.fabric_drop_np =		NULL,
+	.fabric_make_nodeacl =		&ft_add_acl,
+	.fabric_drop_nodeacl =		&ft_del_acl,
+};
+
+int ft_register_configfs(void)
+{
+	struct target_fabric_configfs *fabric;
+	int ret;
+
+	/*
+	 * Register the top level struct config_item_type with TCM core
+	 */
+	fabric = target_fabric_configfs_init(THIS_MODULE, "fc");
+	if (!fabric) {
+		printk(KERN_INFO "%s: target_fabric_configfs_init() failed!\n",
+		       __func__);
+		return -1;
+	}
+	fabric->tf_ops = ft_fabric_ops;
+
+	/* Allowing support for task_sg_chaining */
+	fabric->tf_ops.task_sg_chaining = 1;
+
+	/*
+	 * Setup default attribute lists for various fabric->tf_cit_tmpl
+	 */
+	TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = ft_wwn_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs =
+						    ft_nacl_base_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL;
+	/*
+	 * register the fabric for use within TCM
+	 */
+	ret = target_fabric_configfs_register(fabric);
+	if (ret < 0) {
+		FT_CONF_DBG("target_fabric_configfs_register() for"
+			    " FC Target failed!\n");
+		printk(KERN_INFO
+		       "%s: target_fabric_configfs_register() failed!\n",
+		       __func__);
+		target_fabric_configfs_free(fabric);
+		return -1;
+	}
+
+	/*
+	 * Setup our local pointer to *fabric.
+	 */
+	ft_configfs = fabric;
+	return 0;
+}
+
+void ft_deregister_configfs(void)
+{
+	if (!ft_configfs)
+		return;
+	target_fabric_configfs_deregister(ft_configfs);
+	ft_configfs = NULL;
+}
+
+static struct notifier_block ft_notifier = {
+	.notifier_call = ft_lport_notify
+};
+
+static int __init ft_init(void)
+{
+	if (ft_register_configfs())
+		return -1;
+	if (fc_fc4_register_provider(FC_TYPE_FCP, &ft_prov)) {
+		ft_deregister_configfs();
+		return -1;
+	}
+	blocking_notifier_chain_register(&fc_lport_notifier_head, &ft_notifier);
+	fc_lport_iterate(ft_lport_add, NULL);
+	return 0;
+}
+
+static void __exit ft_exit(void)
+{
+	blocking_notifier_chain_unregister(&fc_lport_notifier_head,
+					   &ft_notifier);
+	fc_fc4_deregister_provider(FC_TYPE_FCP, &ft_prov);
+	fc_lport_iterate(ft_lport_del, NULL);
+	ft_deregister_configfs();
+	synchronize_rcu();
+}
+
+#ifdef MODULE
+MODULE_DESCRIPTION("FC TCM fabric driver " FT_VERSION);
+MODULE_LICENSE("GPL");
+module_init(ft_init);
+module_exit(ft_exit);
+#endif /* MODULE */
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
new file mode 100644
index 000000000000..4c3c0efbe13f
--- /dev/null
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2010 Cisco Systems, Inc.
+ *
+ * Portions based on tcm_loop_fabric_scsi.c and libfc/fc_fcp.c
+ *
+ * Copyright (c) 2007 Intel Corporation. All rights reserved.
+ * Copyright (c) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2008 Mike Christie
+ * Copyright (c) 2009 Rising Tide, Inc.
+ * Copyright (c) 2009 Linux-iSCSI.org
+ * Copyright (c) 2009 Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* XXX TBD some includes may be extraneous */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/version.h>
+#include <generated/utsrelease.h>
+#include <linux/utsname.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/configfs.h>
+#include <linux/ctype.h>
+#include <linux/hash.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_base.h>
+#include <target/configfs_macros.h>
+
+#include "tcm_fc.h"
+
+/*
+ * Deliver read data back to initiator.
+ * XXX TBD handle resource problems later.
+ */
+int ft_queue_data_in(struct se_cmd *se_cmd)
+{
+	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+	struct se_transport_task *task;
+	struct fc_frame *fp = NULL;
+	struct fc_exch *ep;
+	struct fc_lport *lport;
+	struct se_mem *mem;
+	size_t remaining;
+	u32 f_ctl = FC_FC_EX_CTX | FC_FC_REL_OFF;
+	u32 mem_off;
+	u32 fh_off = 0;
+	u32 frame_off = 0;
+	size_t frame_len = 0;
+	size_t mem_len;
+	size_t tlen;
+	size_t off_in_page;
+	struct page *page;
+	int use_sg;
+	int error;
+	void *page_addr;
+	void *from;
+	void *to = NULL;
+
+	ep = fc_seq_exch(cmd->seq);
+	lport = ep->lp;
+	cmd->seq = lport->tt.seq_start_next(cmd->seq);
+
+	task = T_TASK(se_cmd);
+	BUG_ON(!task);
+	remaining = se_cmd->data_length;
+
+	/*
+	 * Setup to use first mem list entry if any.
+	 */
+	if (task->t_tasks_se_num) {
+		mem = list_first_entry(task->t_mem_list,
+			 struct se_mem, se_list);
+		mem_len = mem->se_len;
+		mem_off = mem->se_off;
+		page = mem->se_page;
+	} else {
+		mem = NULL;
+		mem_len = remaining;
+		mem_off = 0;
+		page = NULL;
+	}
+
+	/* no scatter/gather in skb for odd word length due to fc_seq_send() */
+	use_sg = !(remaining % 4);
+
+	while (remaining) {
+		if (!mem_len) {
+			BUG_ON(!mem);
+			mem = list_entry(mem->se_list.next,
+				struct se_mem, se_list);
+			mem_len = min((size_t)mem->se_len, remaining);
+			mem_off = mem->se_off;
+			page = mem->se_page;
+		}
+		if (!frame_len) {
+			/*
+			 * If lport's has capability of Large Send Offload LSO)
+			 * , then allow 'frame_len' to be as big as 'lso_max'
+			 * if indicated transfer length is >= lport->lso_max
+			 */
+			frame_len = (lport->seq_offload) ? lport->lso_max :
+							  cmd->sess->max_frame;
+			frame_len = min(frame_len, remaining);
+			fp = fc_frame_alloc(lport, use_sg ? 0 : frame_len);
+			if (!fp)
+				return -ENOMEM;
+			to = fc_frame_payload_get(fp, 0);
+			fh_off = frame_off;
+			frame_off += frame_len;
+			/*
+			 * Setup the frame's max payload which is used by base
+			 * driver to indicate HW about max frame size, so that
+			 * HW can do fragmentation appropriately based on
+			 * "gso_max_size" of underline netdev.
+			 */
+			fr_max_payload(fp) = cmd->sess->max_frame;
+		}
+		tlen = min(mem_len, frame_len);
+
+		if (use_sg) {
+			if (!mem) {
+				BUG_ON(!task->t_task_buf);
+				page_addr = task->t_task_buf + mem_off;
+				/*
+				 * In this case, offset is 'offset_in_page' of
+				 * (t_task_buf + mem_off) instead of 'mem_off'.
+				 */
+				off_in_page = offset_in_page(page_addr);
+				page = virt_to_page(page_addr);
+				tlen = min(tlen, PAGE_SIZE - off_in_page);
+			} else
+				off_in_page = mem_off;
+			BUG_ON(!page);
+			get_page(page);
+			skb_fill_page_desc(fp_skb(fp),
+					   skb_shinfo(fp_skb(fp))->nr_frags,
+					   page, off_in_page, tlen);
+			fr_len(fp) += tlen;
+			fp_skb(fp)->data_len += tlen;
+			fp_skb(fp)->truesize +=
+					PAGE_SIZE << compound_order(page);
+		} else if (mem) {
+			BUG_ON(!page);
+			from = kmap_atomic(page + (mem_off >> PAGE_SHIFT),
+					   KM_SOFTIRQ0);
+			page_addr = from;
+			from += mem_off & ~PAGE_MASK;
+			tlen = min(tlen, (size_t)(PAGE_SIZE -
+						(mem_off & ~PAGE_MASK)));
+			memcpy(to, from, tlen);
+			kunmap_atomic(page_addr, KM_SOFTIRQ0);
+			to += tlen;
+		} else {
+			from = task->t_task_buf + mem_off;
+			memcpy(to, from, tlen);
+			to += tlen;
+		}
+
+		mem_off += tlen;
+		mem_len -= tlen;
+		frame_len -= tlen;
+		remaining -= tlen;
+
+		if (frame_len &&
+		    (skb_shinfo(fp_skb(fp))->nr_frags < FC_FRAME_SG_LEN))
+			continue;
+		if (!remaining)
+			f_ctl |= FC_FC_END_SEQ;
+		fc_fill_fc_hdr(fp, FC_RCTL_DD_SOL_DATA, ep->did, ep->sid,
+			       FC_TYPE_FCP, f_ctl, fh_off);
+		error = lport->tt.seq_send(lport, cmd->seq, fp);
+		if (error) {
+			/* XXX For now, initiator will retry */
+			if (printk_ratelimit())
+				printk(KERN_ERR "%s: Failed to send frame %p, "
+						"xid <0x%x>, remaining <0x%x>, "
+						"lso_max <0x%x>\n",
+						__func__, fp, ep->xid,
+						remaining, lport->lso_max);
+		}
+	}
+	return ft_queue_status(se_cmd);
+}
+
+/*
+ * Receive write data frame.
+ */
+void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
+{
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+	struct fc_seq *seq = cmd->seq;
+	struct fc_exch *ep;
+	struct fc_lport *lport;
+	struct se_transport_task *task;
+	struct fc_frame_header *fh;
+	struct se_mem *mem;
+	u32 mem_off;
+	u32 rel_off;
+	size_t frame_len;
+	size_t mem_len;
+	size_t tlen;
+	struct page *page;
+	void *page_addr;
+	void *from;
+	void *to;
+	u32 f_ctl;
+	void *buf;
+
+	task = T_TASK(se_cmd);
+	BUG_ON(!task);
+
+	fh = fc_frame_header_get(fp);
+	if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF))
+		goto drop;
+
+	/*
+	 * Doesn't expect even single byte of payload. Payload
+	 * is expected to be copied directly to user buffers
+	 * due to DDP (Large Rx offload) feature, hence
+	 * BUG_ON if BUF is non-NULL
+	 */
+	buf = fc_frame_payload_get(fp, 1);
+	if (cmd->was_ddp_setup && buf) {
+		printk(KERN_INFO "%s: When DDP was setup, not expected to"
+				 "receive frame with payload, Payload shall be"
+				 "copied directly to buffer instead of coming "
+				 "via. legacy receive queues\n", __func__);
+		BUG_ON(buf);
+	}
+
+	/*
+	 * If ft_cmd indicated 'ddp_setup', in that case only the last frame
+	 * should come with 'TSI bit being set'. If 'TSI bit is not set and if
+	 * data frame appears here, means error condition. In both the cases
+	 * release the DDP context (ddp_put) and in error case, as well
+	 * initiate error recovery mechanism.
+	 */
+	ep = fc_seq_exch(seq);
+	if (cmd->was_ddp_setup) {
+		BUG_ON(!ep);
+		lport = ep->lp;
+		BUG_ON(!lport);
+	}
+	if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) {
+		f_ctl = ntoh24(fh->fh_f_ctl);
+		/*
+		 * If TSI bit set in f_ctl, means last write data frame is
+		 * received successfully where payload is posted directly
+		 * to user buffer and only the last frame's header is posted
+		 * in legacy receive queue
+		 */
+		if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */
+			cmd->write_data_len = lport->tt.ddp_done(lport,
+								ep->xid);
+			goto last_frame;
+		} else {
+			/*
+			 * Updating the write_data_len may be meaningless at
+			 * this point, but just in case if required in future
+			 * for debugging or any other purpose
+			 */
+			printk(KERN_ERR "%s: Received frame with TSI bit not"
+					" being SET, dropping the frame, "
+					"cmd->sg <%p>, cmd->sg_cnt <0x%x>\n",
+					__func__, cmd->sg, cmd->sg_cnt);
+			cmd->write_data_len = lport->tt.ddp_done(lport,
+							      ep->xid);
+			lport->tt.seq_exch_abort(cmd->seq, 0);
+			goto drop;
+		}
+	}
+
+	rel_off = ntohl(fh->fh_parm_offset);
+	frame_len = fr_len(fp);
+	if (frame_len <= sizeof(*fh))
+		goto drop;
+	frame_len -= sizeof(*fh);
+	from = fc_frame_payload_get(fp, 0);
+	if (rel_off >= se_cmd->data_length)
+		goto drop;
+	if (frame_len + rel_off > se_cmd->data_length)
+		frame_len = se_cmd->data_length - rel_off;
+
+	/*
+	 * Setup to use first mem list entry if any.
+	 */
+	if (task->t_tasks_se_num) {
+		mem = list_first_entry(task->t_mem_list,
+				       struct se_mem, se_list);
+		mem_len = mem->se_len;
+		mem_off = mem->se_off;
+		page = mem->se_page;
+	} else {
+		mem = NULL;
+		page = NULL;
+		mem_off = 0;
+		mem_len = frame_len;
+	}
+
+	while (frame_len) {
+		if (!mem_len) {
+			BUG_ON(!mem);
+			mem = list_entry(mem->se_list.next,
+					 struct se_mem, se_list);
+			mem_len = mem->se_len;
+			mem_off = mem->se_off;
+			page = mem->se_page;
+		}
+		if (rel_off >= mem_len) {
+			rel_off -= mem_len;
+			mem_len = 0;
+			continue;
+		}
+		mem_off += rel_off;
+		mem_len -= rel_off;
+		rel_off = 0;
+
+		tlen = min(mem_len, frame_len);
+
+		if (mem) {
+			to = kmap_atomic(page + (mem_off >> PAGE_SHIFT),
+					 KM_SOFTIRQ0);
+			page_addr = to;
+			to += mem_off & ~PAGE_MASK;
+			tlen = min(tlen, (size_t)(PAGE_SIZE -
+						(mem_off & ~PAGE_MASK)));
+			memcpy(to, from, tlen);
+			kunmap_atomic(page_addr, KM_SOFTIRQ0);
+		} else {
+			to = task->t_task_buf + mem_off;
+			memcpy(to, from, tlen);
+		}
+		from += tlen;
+		frame_len -= tlen;
+		mem_off += tlen;
+		mem_len -= tlen;
+		cmd->write_data_len += tlen;
+	}
+last_frame:
+	if (cmd->write_data_len == se_cmd->data_length)
+		transport_generic_handle_data(se_cmd);
+drop:
+	fc_frame_free(fp);
+}
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
new file mode 100644
index 000000000000..a3bd57f2ea32
--- /dev/null
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright (c) 2010 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* XXX TBD some includes may be extraneous */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/version.h>
+#include <generated/utsrelease.h>
+#include <linux/utsname.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/configfs.h>
+#include <linux/ctype.h>
+#include <linux/hash.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/kref.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/libfc.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_base.h>
+#include <target/configfs_macros.h>
+
+#include <scsi/libfc.h>
+#include "tcm_fc.h"
+
+static void ft_sess_delete_all(struct ft_tport *);
+
+/*
+ * Lookup or allocate target local port.
+ * Caller holds ft_lport_lock.
+ */
+static struct ft_tport *ft_tport_create(struct fc_lport *lport)
+{
+	struct ft_tpg *tpg;
+	struct ft_tport *tport;
+	int i;
+
+	tport = rcu_dereference(lport->prov[FC_TYPE_FCP]);
+	if (tport && tport->tpg)
+		return tport;
+
+	tpg = ft_lport_find_tpg(lport);
+	if (!tpg)
+		return NULL;
+
+	if (tport) {
+		tport->tpg = tpg;
+		return tport;
+	}
+
+	tport = kzalloc(sizeof(*tport), GFP_KERNEL);
+	if (!tport)
+		return NULL;
+
+	tport->lport = lport;
+	tport->tpg = tpg;
+	tpg->tport = tport;
+	for (i = 0; i < FT_SESS_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&tport->hash[i]);
+
+	rcu_assign_pointer(lport->prov[FC_TYPE_FCP], tport);
+	return tport;
+}
+
+/*
+ * Free tport via RCU.
+ */
+static void ft_tport_rcu_free(struct rcu_head *rcu)
+{
+	struct ft_tport *tport = container_of(rcu, struct ft_tport, rcu);
+
+	kfree(tport);
+}
+
+/*
+ * Delete a target local port.
+ * Caller holds ft_lport_lock.
+ */
+static void ft_tport_delete(struct ft_tport *tport)
+{
+	struct fc_lport *lport;
+	struct ft_tpg *tpg;
+
+	ft_sess_delete_all(tport);
+	lport = tport->lport;
+	BUG_ON(tport != lport->prov[FC_TYPE_FCP]);
+	rcu_assign_pointer(lport->prov[FC_TYPE_FCP], NULL);
+
+	tpg = tport->tpg;
+	if (tpg) {
+		tpg->tport = NULL;
+		tport->tpg = NULL;
+	}
+	call_rcu(&tport->rcu, ft_tport_rcu_free);
+}
+
+/*
+ * Add local port.
+ * Called thru fc_lport_iterate().
+ */
+void ft_lport_add(struct fc_lport *lport, void *arg)
+{
+	mutex_lock(&ft_lport_lock);
+	ft_tport_create(lport);
+	mutex_unlock(&ft_lport_lock);
+}
+
+/*
+ * Delete local port.
+ * Called thru fc_lport_iterate().
+ */
+void ft_lport_del(struct fc_lport *lport, void *arg)
+{
+	struct ft_tport *tport;
+
+	mutex_lock(&ft_lport_lock);
+	tport = lport->prov[FC_TYPE_FCP];
+	if (tport)
+		ft_tport_delete(tport);
+	mutex_unlock(&ft_lport_lock);
+}
+
+/*
+ * Notification of local port change from libfc.
+ * Create or delete local port and associated tport.
+ */
+int ft_lport_notify(struct notifier_block *nb, unsigned long event, void *arg)
+{
+	struct fc_lport *lport = arg;
+
+	switch (event) {
+	case FC_LPORT_EV_ADD:
+		ft_lport_add(lport, NULL);
+		break;
+	case FC_LPORT_EV_DEL:
+		ft_lport_del(lport, NULL);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/*
+ * Hash function for FC_IDs.
+ */
+static u32 ft_sess_hash(u32 port_id)
+{
+	return hash_32(port_id, FT_SESS_HASH_BITS);
+}
+
+/*
+ * Find session in local port.
+ * Sessions and hash lists are RCU-protected.
+ * A reference is taken which must be eventually freed.
+ */
+static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id)
+{
+	struct ft_tport *tport;
+	struct hlist_head *head;
+	struct hlist_node *pos;
+	struct ft_sess *sess;
+
+	rcu_read_lock();
+	tport = rcu_dereference(lport->prov[FC_TYPE_FCP]);
+	if (!tport)
+		goto out;
+
+	head = &tport->hash[ft_sess_hash(port_id)];
+	hlist_for_each_entry_rcu(sess, pos, head, hash) {
+		if (sess->port_id == port_id) {
+			kref_get(&sess->kref);
+			rcu_read_unlock();
+			FT_SESS_DBG("port_id %x found %p\n", port_id, sess);
+			return sess;
+		}
+	}
+out:
+	rcu_read_unlock();
+	FT_SESS_DBG("port_id %x not found\n", port_id);
+	return NULL;
+}
+
+/*
+ * Allocate session and enter it in the hash for the local port.
+ * Caller holds ft_lport_lock.
+ */
+static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
+				      struct ft_node_acl *acl)
+{
+	struct ft_sess *sess;
+	struct hlist_head *head;
+	struct hlist_node *pos;
+
+	head = &tport->hash[ft_sess_hash(port_id)];
+	hlist_for_each_entry_rcu(sess, pos, head, hash)
+		if (sess->port_id == port_id)
+			return sess;
+
+	sess = kzalloc(sizeof(*sess), GFP_KERNEL);
+	if (!sess)
+		return NULL;
+
+	sess->se_sess = transport_init_session();
+	if (!sess->se_sess) {
+		kfree(sess);
+		return NULL;
+	}
+	sess->se_sess->se_node_acl = &acl->se_node_acl;
+	sess->tport = tport;
+	sess->port_id = port_id;
+	kref_init(&sess->kref);	/* ref for table entry */
+	hlist_add_head_rcu(&sess->hash, head);
+	tport->sess_count++;
+
+	FT_SESS_DBG("port_id %x sess %p\n", port_id, sess);
+
+	transport_register_session(&tport->tpg->se_tpg, &acl->se_node_acl,
+				   sess->se_sess, sess);
+	return sess;
+}
+
+/*
+ * Unhash the session.
+ * Caller holds ft_lport_lock.
+ */
+static void ft_sess_unhash(struct ft_sess *sess)
+{
+	struct ft_tport *tport = sess->tport;
+
+	hlist_del_rcu(&sess->hash);
+	BUG_ON(!tport->sess_count);
+	tport->sess_count--;
+	sess->port_id = -1;
+	sess->params = 0;
+}
+
+/*
+ * Delete session from hash.
+ * Caller holds ft_lport_lock.
+ */
+static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id)
+{
+	struct hlist_head *head;
+	struct hlist_node *pos;
+	struct ft_sess *sess;
+
+	head = &tport->hash[ft_sess_hash(port_id)];
+	hlist_for_each_entry_rcu(sess, pos, head, hash) {
+		if (sess->port_id == port_id) {
+			ft_sess_unhash(sess);
+			return sess;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Delete all sessions from tport.
+ * Caller holds ft_lport_lock.
+ */
+static void ft_sess_delete_all(struct ft_tport *tport)
+{
+	struct hlist_head *head;
+	struct hlist_node *pos;
+	struct ft_sess *sess;
+
+	for (head = tport->hash;
+	     head < &tport->hash[FT_SESS_HASH_SIZE]; head++) {
+		hlist_for_each_entry_rcu(sess, pos, head, hash) {
+			ft_sess_unhash(sess);
+			transport_deregister_session_configfs(sess->se_sess);
+			ft_sess_put(sess);	/* release from table */
+		}
+	}
+}
+
+/*
+ * TCM ops for sessions.
+ */
+
+/*
+ * Determine whether session is allowed to be shutdown in the current context.
+ * Returns non-zero if the session should be shutdown.
+ */
+int ft_sess_shutdown(struct se_session *se_sess)
+{
+	struct ft_sess *sess = se_sess->fabric_sess_ptr;
+
+	FT_SESS_DBG("port_id %x\n", sess->port_id);
+	return 1;
+}
+
+/*
+ * Remove session and send PRLO.
+ * This is called when the ACL is being deleted or queue depth is changing.
+ */
+void ft_sess_close(struct se_session *se_sess)
+{
+	struct ft_sess *sess = se_sess->fabric_sess_ptr;
+	struct fc_lport *lport;
+	u32 port_id;
+
+	mutex_lock(&ft_lport_lock);
+	lport = sess->tport->lport;
+	port_id = sess->port_id;
+	if (port_id == -1) {
+		mutex_lock(&ft_lport_lock);
+		return;
+	}
+	FT_SESS_DBG("port_id %x\n", port_id);
+	ft_sess_unhash(sess);
+	mutex_unlock(&ft_lport_lock);
+	transport_deregister_session_configfs(se_sess);
+	ft_sess_put(sess);
+	/* XXX Send LOGO or PRLO */
+	synchronize_rcu();		/* let transport deregister happen */
+}
+
+void ft_sess_stop(struct se_session *se_sess, int sess_sleep, int conn_sleep)
+{
+	struct ft_sess *sess = se_sess->fabric_sess_ptr;
+
+	FT_SESS_DBG("port_id %x\n", sess->port_id);
+}
+
+int ft_sess_logged_in(struct se_session *se_sess)
+{
+	struct ft_sess *sess = se_sess->fabric_sess_ptr;
+
+	return sess->port_id != -1;
+}
+
+u32 ft_sess_get_index(struct se_session *se_sess)
+{
+	struct ft_sess *sess = se_sess->fabric_sess_ptr;
+
+	return sess->port_id;	/* XXX TBD probably not what is needed */
+}
+
+u32 ft_sess_get_port_name(struct se_session *se_sess,
+			  unsigned char *buf, u32 len)
+{
+	struct ft_sess *sess = se_sess->fabric_sess_ptr;
+
+	return ft_format_wwn(buf, len, sess->port_name);
+}
+
+void ft_sess_set_erl0(struct se_session *se_sess)
+{
+	/* XXX TBD called when out of memory */
+}
+
+/*
+ * libfc ops involving sessions.
+ */
+
+static int ft_prli_locked(struct fc_rport_priv *rdata, u32 spp_len,
+			  const struct fc_els_spp *rspp, struct fc_els_spp *spp)
+{
+	struct ft_tport *tport;
+	struct ft_sess *sess;
+	struct ft_node_acl *acl;
+	u32 fcp_parm;
+
+	tport = ft_tport_create(rdata->local_port);
+	if (!tport)
+		return 0;	/* not a target for this local port */
+
+	acl = ft_acl_get(tport->tpg, rdata);
+	if (!acl)
+		return 0;
+
+	if (!rspp)
+		goto fill;
+
+	if (rspp->spp_flags & (FC_SPP_OPA_VAL | FC_SPP_RPA_VAL))
+		return FC_SPP_RESP_NO_PA;
+
+	/*
+	 * If both target and initiator bits are off, the SPP is invalid.
+	 */
+	fcp_parm = ntohl(rspp->spp_params);
+	if (!(fcp_parm & (FCP_SPPF_INIT_FCN | FCP_SPPF_TARG_FCN)))
+		return FC_SPP_RESP_INVL;
+
+	/*
+	 * Create session (image pair) only if requested by
+	 * EST_IMG_PAIR flag and if the requestor is an initiator.
+	 */
+	if (rspp->spp_flags & FC_SPP_EST_IMG_PAIR) {
+		spp->spp_flags |= FC_SPP_EST_IMG_PAIR;
+		if (!(fcp_parm & FCP_SPPF_INIT_FCN))
+			return FC_SPP_RESP_CONF;
+		sess = ft_sess_create(tport, rdata->ids.port_id, acl);
+		if (!sess)
+			return FC_SPP_RESP_RES;
+		if (!sess->params)
+			rdata->prli_count++;
+		sess->params = fcp_parm;
+		sess->port_name = rdata->ids.port_name;
+		sess->max_frame = rdata->maxframe_size;
+
+		/* XXX TBD - clearing actions.  unit attn, see 4.10 */
+	}
+
+	/*
+	 * OR in our service parameters with other provider (initiator), if any.
+	 * TBD XXX - indicate RETRY capability?
+	 */
+fill:
+	fcp_parm = ntohl(spp->spp_params);
+	spp->spp_params = htonl(fcp_parm | FCP_SPPF_TARG_FCN);
+	return FC_SPP_RESP_ACK;
+}
+
+/**
+ * tcm_fcp_prli() - Handle incoming or outgoing PRLI for the FCP target
+ * @rdata: remote port private
+ * @spp_len: service parameter page length
+ * @rspp: received service parameter page (NULL for outgoing PRLI)
+ * @spp: response service parameter page
+ *
+ * Returns spp response code.
+ */
+static int ft_prli(struct fc_rport_priv *rdata, u32 spp_len,
+		   const struct fc_els_spp *rspp, struct fc_els_spp *spp)
+{
+	int ret;
+
+	mutex_lock(&ft_lport_lock);
+	ret = ft_prli_locked(rdata, spp_len, rspp, spp);
+	mutex_unlock(&ft_lport_lock);
+	FT_SESS_DBG("port_id %x flags %x ret %x\n",
+	       rdata->ids.port_id, rspp ? rspp->spp_flags : 0, ret);
+	return ret;
+}
+
+static void ft_sess_rcu_free(struct rcu_head *rcu)
+{
+	struct ft_sess *sess = container_of(rcu, struct ft_sess, rcu);
+
+	transport_deregister_session(sess->se_sess);
+	kfree(sess);
+}
+
+static void ft_sess_free(struct kref *kref)
+{
+	struct ft_sess *sess = container_of(kref, struct ft_sess, kref);
+
+	call_rcu(&sess->rcu, ft_sess_rcu_free);
+}
+
+void ft_sess_put(struct ft_sess *sess)
+{
+	int sess_held = atomic_read(&sess->kref.refcount);
+
+	BUG_ON(!sess_held);
+	kref_put(&sess->kref, ft_sess_free);
+}
+
+static void ft_prlo(struct fc_rport_priv *rdata)
+{
+	struct ft_sess *sess;
+	struct ft_tport *tport;
+
+	mutex_lock(&ft_lport_lock);
+	tport = rcu_dereference(rdata->local_port->prov[FC_TYPE_FCP]);
+	if (!tport) {
+		mutex_unlock(&ft_lport_lock);
+		return;
+	}
+	sess = ft_sess_delete(tport, rdata->ids.port_id);
+	if (!sess) {
+		mutex_unlock(&ft_lport_lock);
+		return;
+	}
+	mutex_unlock(&ft_lport_lock);
+	transport_deregister_session_configfs(sess->se_sess);
+	ft_sess_put(sess);		/* release from table */
+	rdata->prli_count--;
+	/* XXX TBD - clearing actions.  unit attn, see 4.10 */
+}
+
+/*
+ * Handle incoming FCP request.
+ * Caller has verified that the frame is type FCP.
+ */
+static void ft_recv(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct ft_sess *sess;
+	u32 sid = fc_frame_sid(fp);
+
+	FT_SESS_DBG("sid %x\n", sid);
+
+	sess = ft_sess_get(lport, sid);
+	if (!sess) {
+		FT_SESS_DBG("sid %x sess lookup failed\n", sid);
+		/* TBD XXX - if FCP_CMND, send PRLO */
+		fc_frame_free(fp);
+		return;
+	}
+	ft_recv_req(sess, fp);	/* must do ft_sess_put() */
+}
+
+/*
+ * Provider ops for libfc.
+ */
+struct fc4_prov ft_prov = {
+	.prli = ft_prli,
+	.prlo = ft_prlo,
+	.recv = ft_recv,
+	.module = THIS_MODULE,
+};
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 47f8cdb207f1..74273e638c0d 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -1658,8 +1658,12 @@ static void gsm_queue(struct gsm_mux *gsm)
 
 	if ((gsm->control & ~PF) == UI)
 		gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len);
-	/* generate final CRC with received FCS */
-	gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs);
+	if (gsm->encoding == 0){
+		/* WARNING: gsm->received_fcs is used for gsm->encoding = 0 only.
+		            In this case it contain the last piece of data
+		            required to generate final CRC */
+		gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs);
+	}
 	if (gsm->fcs != GOOD_FCS) {
 		gsm->bad_fcs++;
 		if (debug & 4)
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 80484af781e1..b1f0f83b870d 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1391,6 +1391,14 @@ config SERIAL_OF_PLATFORM_NWPSERIAL_CONSOLE
 	help
 	  Support for Console on the NWP serial ports.
 
+config SERIAL_LANTIQ
+	bool "Lantiq serial driver"
+	depends on LANTIQ
+	select SERIAL_CORE
+	select SERIAL_CORE_CONSOLE
+	help
+	  Support for console and UART on Lantiq SoCs.
+
 config SERIAL_QE
 	tristate "Freescale QUICC Engine serial port support"
 	depends on QUICC_ENGINE
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index fee0690ef8e3..35276043d9d1 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -94,3 +94,4 @@ obj-$(CONFIG_SERIAL_IFX6X60)  	+= ifx6x60.o
 obj-$(CONFIG_SERIAL_PCH_UART)	+= pch_uart.o
 obj-$(CONFIG_SERIAL_MSM_SMD)	+= msm_smd_tty.o
 obj-$(CONFIG_SERIAL_MXS_AUART) += mxs-auart.o
+obj-$(CONFIG_SERIAL_LANTIQ)	+= lantiq.o
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index cb36b0d4ef3c..62df72d9f0aa 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -382,12 +382,13 @@ static void imx_start_tx(struct uart_port *port)
 static irqreturn_t imx_rtsint(int irq, void *dev_id)
 {
 	struct imx_port *sport = dev_id;
-	unsigned int val = readl(sport->port.membase + USR1) & USR1_RTSS;
+	unsigned int val;
 	unsigned long flags;
 
 	spin_lock_irqsave(&sport->port.lock, flags);
 
 	writel(USR1_RTSD, sport->port.membase + USR1);
+	val = readl(sport->port.membase + USR1) & USR1_RTSS;
 	uart_handle_cts_change(&sport->port, !!val);
 	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
new file mode 100644
index 000000000000..58cf279ed879
--- /dev/null
+++ b/drivers/tty/serial/lantiq.c
@@ -0,0 +1,756 @@
+/*
+ *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Copyright (C) 2004 Infineon IFAP DC COM CPE
+ * Copyright (C) 2007 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2007 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 Thomas Langer, <thomas.langer@lantiq.com>
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/device.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+
+#include <lantiq_soc.h>
+
+#define PORT_LTQ_ASC		111
+#define MAXPORTS		2
+#define UART_DUMMY_UER_RX	1
+#define DRVNAME			"ltq_asc"
+#ifdef __BIG_ENDIAN
+#define LTQ_ASC_TBUF		(0x0020 + 3)
+#define LTQ_ASC_RBUF		(0x0024 + 3)
+#else
+#define LTQ_ASC_TBUF		0x0020
+#define LTQ_ASC_RBUF		0x0024
+#endif
+#define LTQ_ASC_FSTAT		0x0048
+#define LTQ_ASC_WHBSTATE	0x0018
+#define LTQ_ASC_STATE		0x0014
+#define LTQ_ASC_IRNCR		0x00F8
+#define LTQ_ASC_CLC		0x0000
+#define LTQ_ASC_ID		0x0008
+#define LTQ_ASC_PISEL		0x0004
+#define LTQ_ASC_TXFCON		0x0044
+#define LTQ_ASC_RXFCON		0x0040
+#define LTQ_ASC_CON		0x0010
+#define LTQ_ASC_BG		0x0050
+#define LTQ_ASC_IRNREN		0x00F4
+
+#define ASC_IRNREN_TX		0x1
+#define ASC_IRNREN_RX		0x2
+#define ASC_IRNREN_ERR		0x4
+#define ASC_IRNREN_TX_BUF	0x8
+#define ASC_IRNCR_TIR		0x1
+#define ASC_IRNCR_RIR		0x2
+#define ASC_IRNCR_EIR		0x4
+
+#define ASCOPT_CSIZE		0x3
+#define TXFIFO_FL		1
+#define RXFIFO_FL		1
+#define ASCCLC_DISS		0x2
+#define ASCCLC_RMCMASK		0x0000FF00
+#define ASCCLC_RMCOFFSET	8
+#define ASCCON_M_8ASYNC		0x0
+#define ASCCON_M_7ASYNC		0x2
+#define ASCCON_ODD		0x00000020
+#define ASCCON_STP		0x00000080
+#define ASCCON_BRS		0x00000100
+#define ASCCON_FDE		0x00000200
+#define ASCCON_R		0x00008000
+#define ASCCON_FEN		0x00020000
+#define ASCCON_ROEN		0x00080000
+#define ASCCON_TOEN		0x00100000
+#define ASCSTATE_PE		0x00010000
+#define ASCSTATE_FE		0x00020000
+#define ASCSTATE_ROE		0x00080000
+#define ASCSTATE_ANY		(ASCSTATE_ROE|ASCSTATE_PE|ASCSTATE_FE)
+#define ASCWHBSTATE_CLRREN	0x00000001
+#define ASCWHBSTATE_SETREN	0x00000002
+#define ASCWHBSTATE_CLRPE	0x00000004
+#define ASCWHBSTATE_CLRFE	0x00000008
+#define ASCWHBSTATE_CLRROE	0x00000020
+#define ASCTXFCON_TXFEN		0x0001
+#define ASCTXFCON_TXFFLU	0x0002
+#define ASCTXFCON_TXFITLMASK	0x3F00
+#define ASCTXFCON_TXFITLOFF	8
+#define ASCRXFCON_RXFEN		0x0001
+#define ASCRXFCON_RXFFLU	0x0002
+#define ASCRXFCON_RXFITLMASK	0x3F00
+#define ASCRXFCON_RXFITLOFF	8
+#define ASCFSTAT_RXFFLMASK	0x003F
+#define ASCFSTAT_TXFFLMASK	0x3F00
+#define ASCFSTAT_TXFREEMASK	0x3F000000
+#define ASCFSTAT_TXFREEOFF	24
+
+static void lqasc_tx_chars(struct uart_port *port);
+static struct ltq_uart_port *lqasc_port[MAXPORTS];
+static struct uart_driver lqasc_reg;
+static DEFINE_SPINLOCK(ltq_asc_lock);
+
+struct ltq_uart_port {
+	struct uart_port	port;
+	struct clk		*clk;
+	unsigned int		tx_irq;
+	unsigned int		rx_irq;
+	unsigned int		err_irq;
+};
+
+static inline struct
+ltq_uart_port *to_ltq_uart_port(struct uart_port *port)
+{
+	return container_of(port, struct ltq_uart_port, port);
+}
+
+static void
+lqasc_stop_tx(struct uart_port *port)
+{
+	return;
+}
+
+static void
+lqasc_start_tx(struct uart_port *port)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&ltq_asc_lock, flags);
+	lqasc_tx_chars(port);
+	spin_unlock_irqrestore(&ltq_asc_lock, flags);
+	return;
+}
+
+static void
+lqasc_stop_rx(struct uart_port *port)
+{
+	ltq_w32(ASCWHBSTATE_CLRREN, port->membase + LTQ_ASC_WHBSTATE);
+}
+
+static void
+lqasc_enable_ms(struct uart_port *port)
+{
+}
+
+static int
+lqasc_rx_chars(struct uart_port *port)
+{
+	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
+	unsigned int ch = 0, rsr = 0, fifocnt;
+
+	if (!tty) {
+		dev_dbg(port->dev, "%s:tty is busy now", __func__);
+		return -EBUSY;
+	}
+	fifocnt =
+		ltq_r32(port->membase + LTQ_ASC_FSTAT) & ASCFSTAT_RXFFLMASK;
+	while (fifocnt--) {
+		u8 flag = TTY_NORMAL;
+		ch = ltq_r8(port->membase + LTQ_ASC_RBUF);
+		rsr = (ltq_r32(port->membase + LTQ_ASC_STATE)
+			& ASCSTATE_ANY) | UART_DUMMY_UER_RX;
+		tty_flip_buffer_push(tty);
+		port->icount.rx++;
+
+		/*
+		 * Note that the error handling code is
+		 * out of the main execution path
+		 */
+		if (rsr & ASCSTATE_ANY) {
+			if (rsr & ASCSTATE_PE) {
+				port->icount.parity++;
+				ltq_w32_mask(0, ASCWHBSTATE_CLRPE,
+					port->membase + LTQ_ASC_WHBSTATE);
+			} else if (rsr & ASCSTATE_FE) {
+				port->icount.frame++;
+				ltq_w32_mask(0, ASCWHBSTATE_CLRFE,
+					port->membase + LTQ_ASC_WHBSTATE);
+			}
+			if (rsr & ASCSTATE_ROE) {
+				port->icount.overrun++;
+				ltq_w32_mask(0, ASCWHBSTATE_CLRROE,
+					port->membase + LTQ_ASC_WHBSTATE);
+			}
+
+			rsr &= port->read_status_mask;
+
+			if (rsr & ASCSTATE_PE)
+				flag = TTY_PARITY;
+			else if (rsr & ASCSTATE_FE)
+				flag = TTY_FRAME;
+		}
+
+		if ((rsr & port->ignore_status_mask) == 0)
+			tty_insert_flip_char(tty, ch, flag);
+
+		if (rsr & ASCSTATE_ROE)
+			/*
+			 * Overrun is special, since it's reported
+			 * immediately, and doesn't affect the current
+			 * character
+			 */
+			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+	}
+	if (ch != 0)
+		tty_flip_buffer_push(tty);
+	tty_kref_put(tty);
+	return 0;
+}
+
+static void
+lqasc_tx_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+	if (uart_tx_stopped(port)) {
+		lqasc_stop_tx(port);
+		return;
+	}
+
+	while (((ltq_r32(port->membase + LTQ_ASC_FSTAT) &
+		ASCFSTAT_TXFREEMASK) >> ASCFSTAT_TXFREEOFF) != 0) {
+		if (port->x_char) {
+			ltq_w8(port->x_char, port->membase + LTQ_ASC_TBUF);
+			port->icount.tx++;
+			port->x_char = 0;
+			continue;
+		}
+
+		if (uart_circ_empty(xmit))
+			break;
+
+		ltq_w8(port->state->xmit.buf[port->state->xmit.tail],
+			port->membase + LTQ_ASC_TBUF);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+}
+
+static irqreturn_t
+lqasc_tx_int(int irq, void *_port)
+{
+	unsigned long flags;
+	struct uart_port *port = (struct uart_port *)_port;
+	spin_lock_irqsave(&ltq_asc_lock, flags);
+	ltq_w32(ASC_IRNCR_TIR, port->membase + LTQ_ASC_IRNCR);
+	spin_unlock_irqrestore(&ltq_asc_lock, flags);
+	lqasc_start_tx(port);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+lqasc_err_int(int irq, void *_port)
+{
+	unsigned long flags;
+	struct uart_port *port = (struct uart_port *)_port;
+	spin_lock_irqsave(&ltq_asc_lock, flags);
+	/* clear any pending interrupts */
+	ltq_w32_mask(0, ASCWHBSTATE_CLRPE | ASCWHBSTATE_CLRFE |
+		ASCWHBSTATE_CLRROE, port->membase + LTQ_ASC_WHBSTATE);
+	spin_unlock_irqrestore(&ltq_asc_lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+lqasc_rx_int(int irq, void *_port)
+{
+	unsigned long flags;
+	struct uart_port *port = (struct uart_port *)_port;
+	spin_lock_irqsave(&ltq_asc_lock, flags);
+	ltq_w32(ASC_IRNCR_RIR, port->membase + LTQ_ASC_IRNCR);
+	lqasc_rx_chars(port);
+	spin_unlock_irqrestore(&ltq_asc_lock, flags);
+	return IRQ_HANDLED;
+}
+
+static unsigned int
+lqasc_tx_empty(struct uart_port *port)
+{
+	int status;
+	status = ltq_r32(port->membase + LTQ_ASC_FSTAT) & ASCFSTAT_TXFFLMASK;
+	return status ? 0 : TIOCSER_TEMT;
+}
+
+static unsigned int
+lqasc_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CTS | TIOCM_CAR | TIOCM_DSR;
+}
+
+static void
+lqasc_set_mctrl(struct uart_port *port, u_int mctrl)
+{
+}
+
+static void
+lqasc_break_ctl(struct uart_port *port, int break_state)
+{
+}
+
+static int
+lqasc_startup(struct uart_port *port)
+{
+	struct ltq_uart_port *ltq_port = to_ltq_uart_port(port);
+	int retval;
+
+	port->uartclk = clk_get_rate(ltq_port->clk);
+
+	ltq_w32_mask(ASCCLC_DISS | ASCCLC_RMCMASK, (1 << ASCCLC_RMCOFFSET),
+		port->membase + LTQ_ASC_CLC);
+
+	ltq_w32(0, port->membase + LTQ_ASC_PISEL);
+	ltq_w32(
+		((TXFIFO_FL << ASCTXFCON_TXFITLOFF) & ASCTXFCON_TXFITLMASK) |
+		ASCTXFCON_TXFEN | ASCTXFCON_TXFFLU,
+		port->membase + LTQ_ASC_TXFCON);
+	ltq_w32(
+		((RXFIFO_FL << ASCRXFCON_RXFITLOFF) & ASCRXFCON_RXFITLMASK)
+		| ASCRXFCON_RXFEN | ASCRXFCON_RXFFLU,
+		port->membase + LTQ_ASC_RXFCON);
+	/* make sure other settings are written to hardware before
+	 * setting enable bits
+	 */
+	wmb();
+	ltq_w32_mask(0, ASCCON_M_8ASYNC | ASCCON_FEN | ASCCON_TOEN |
+		ASCCON_ROEN, port->membase + LTQ_ASC_CON);
+
+	retval = request_irq(ltq_port->tx_irq, lqasc_tx_int,
+		IRQF_DISABLED, "asc_tx", port);
+	if (retval) {
+		pr_err("failed to request lqasc_tx_int\n");
+		return retval;
+	}
+
+	retval = request_irq(ltq_port->rx_irq, lqasc_rx_int,
+		IRQF_DISABLED, "asc_rx", port);
+	if (retval) {
+		pr_err("failed to request lqasc_rx_int\n");
+		goto err1;
+	}
+
+	retval = request_irq(ltq_port->err_irq, lqasc_err_int,
+		IRQF_DISABLED, "asc_err", port);
+	if (retval) {
+		pr_err("failed to request lqasc_err_int\n");
+		goto err2;
+	}
+
+	ltq_w32(ASC_IRNREN_RX | ASC_IRNREN_ERR | ASC_IRNREN_TX,
+		port->membase + LTQ_ASC_IRNREN);
+	return 0;
+
+err2:
+	free_irq(ltq_port->rx_irq, port);
+err1:
+	free_irq(ltq_port->tx_irq, port);
+	return retval;
+}
+
+static void
+lqasc_shutdown(struct uart_port *port)
+{
+	struct ltq_uart_port *ltq_port = to_ltq_uart_port(port);
+	free_irq(ltq_port->tx_irq, port);
+	free_irq(ltq_port->rx_irq, port);
+	free_irq(ltq_port->err_irq, port);
+
+	ltq_w32(0, port->membase + LTQ_ASC_CON);
+	ltq_w32_mask(ASCRXFCON_RXFEN, ASCRXFCON_RXFFLU,
+		port->membase + LTQ_ASC_RXFCON);
+	ltq_w32_mask(ASCTXFCON_TXFEN, ASCTXFCON_TXFFLU,
+		port->membase + LTQ_ASC_TXFCON);
+}
+
+static void
+lqasc_set_termios(struct uart_port *port,
+	struct ktermios *new, struct ktermios *old)
+{
+	unsigned int cflag;
+	unsigned int iflag;
+	unsigned int divisor;
+	unsigned int baud;
+	unsigned int con = 0;
+	unsigned long flags;
+
+	cflag = new->c_cflag;
+	iflag = new->c_iflag;
+
+	switch (cflag & CSIZE) {
+	case CS7:
+		con = ASCCON_M_7ASYNC;
+		break;
+
+	case CS5:
+	case CS6:
+	default:
+		new->c_cflag &= ~ CSIZE;
+		new->c_cflag |= CS8;
+		con = ASCCON_M_8ASYNC;
+		break;
+	}
+
+	cflag &= ~CMSPAR; /* Mark/Space parity is not supported */
+
+	if (cflag & CSTOPB)
+		con |= ASCCON_STP;
+
+	if (cflag & PARENB) {
+		if (!(cflag & PARODD))
+			con &= ~ASCCON_ODD;
+		else
+			con |= ASCCON_ODD;
+	}
+
+	port->read_status_mask = ASCSTATE_ROE;
+	if (iflag & INPCK)
+		port->read_status_mask |= ASCSTATE_FE | ASCSTATE_PE;
+
+	port->ignore_status_mask = 0;
+	if (iflag & IGNPAR)
+		port->ignore_status_mask |= ASCSTATE_FE | ASCSTATE_PE;
+
+	if (iflag & IGNBRK) {
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (iflag & IGNPAR)
+			port->ignore_status_mask |= ASCSTATE_ROE;
+	}
+
+	if ((cflag & CREAD) == 0)
+		port->ignore_status_mask |= UART_DUMMY_UER_RX;
+
+	/* set error signals  - framing, parity  and overrun, enable receiver */
+	con |= ASCCON_FEN | ASCCON_TOEN | ASCCON_ROEN;
+
+	spin_lock_irqsave(&ltq_asc_lock, flags);
+
+	/* set up CON */
+	ltq_w32_mask(0, con, port->membase + LTQ_ASC_CON);
+
+	/* Set baud rate - take a divider of 2 into account */
+	baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16);
+	divisor = uart_get_divisor(port, baud);
+	divisor = divisor / 2 - 1;
+
+	/* disable the baudrate generator */
+	ltq_w32_mask(ASCCON_R, 0, port->membase + LTQ_ASC_CON);
+
+	/* make sure the fractional divider is off */
+	ltq_w32_mask(ASCCON_FDE, 0, port->membase + LTQ_ASC_CON);
+
+	/* set up to use divisor of 2 */
+	ltq_w32_mask(ASCCON_BRS, 0, port->membase + LTQ_ASC_CON);
+
+	/* now we can write the new baudrate into the register */
+	ltq_w32(divisor, port->membase + LTQ_ASC_BG);
+
+	/* turn the baudrate generator back on */
+	ltq_w32_mask(0, ASCCON_R, port->membase + LTQ_ASC_CON);
+
+	/* enable rx */
+	ltq_w32(ASCWHBSTATE_SETREN, port->membase + LTQ_ASC_WHBSTATE);
+
+	spin_unlock_irqrestore(&ltq_asc_lock, flags);
+
+	/* Don't rewrite B0 */
+        if (tty_termios_baud_rate(new))
+		tty_termios_encode_baud_rate(new, baud, baud);
+}
+
+static const char*
+lqasc_type(struct uart_port *port)
+{
+	if (port->type == PORT_LTQ_ASC)
+		return DRVNAME;
+	else
+		return NULL;
+}
+
+static void
+lqasc_release_port(struct uart_port *port)
+{
+	if (port->flags & UPF_IOREMAP) {
+		iounmap(port->membase);
+		port->membase = NULL;
+	}
+}
+
+static int
+lqasc_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	struct resource *res;
+	int size;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "cannot obtain I/O memory region");
+		return -ENODEV;
+	}
+	size = resource_size(res);
+
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		size, dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "cannot request I/O memory region");
+		return -EBUSY;
+	}
+
+	if (port->flags & UPF_IOREMAP) {
+		port->membase = devm_ioremap_nocache(&pdev->dev,
+			port->mapbase, size);
+		if (port->membase == NULL)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static void
+lqasc_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_LTQ_ASC;
+		lqasc_request_port(port);
+	}
+}
+
+static int
+lqasc_verify_port(struct uart_port *port,
+	struct serial_struct *ser)
+{
+	int ret = 0;
+	if (ser->type != PORT_UNKNOWN && ser->type != PORT_LTQ_ASC)
+		ret = -EINVAL;
+	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+		ret = -EINVAL;
+	if (ser->baud_base < 9600)
+		ret = -EINVAL;
+	return ret;
+}
+
+static struct uart_ops lqasc_pops = {
+	.tx_empty =	lqasc_tx_empty,
+	.set_mctrl =	lqasc_set_mctrl,
+	.get_mctrl =	lqasc_get_mctrl,
+	.stop_tx =	lqasc_stop_tx,
+	.start_tx =	lqasc_start_tx,
+	.stop_rx =	lqasc_stop_rx,
+	.enable_ms =	lqasc_enable_ms,
+	.break_ctl =	lqasc_break_ctl,
+	.startup =	lqasc_startup,
+	.shutdown =	lqasc_shutdown,
+	.set_termios =	lqasc_set_termios,
+	.type =		lqasc_type,
+	.release_port =	lqasc_release_port,
+	.request_port =	lqasc_request_port,
+	.config_port =	lqasc_config_port,
+	.verify_port =	lqasc_verify_port,
+};
+
+static void
+lqasc_console_putchar(struct uart_port *port, int ch)
+{
+	int fifofree;
+
+	if (!port->membase)
+		return;
+
+	do {
+		fifofree = (ltq_r32(port->membase + LTQ_ASC_FSTAT)
+			& ASCFSTAT_TXFREEMASK) >> ASCFSTAT_TXFREEOFF;
+	} while (fifofree == 0);
+	ltq_w8(ch, port->membase + LTQ_ASC_TBUF);
+}
+
+
+static void
+lqasc_console_write(struct console *co, const char *s, u_int count)
+{
+	struct ltq_uart_port *ltq_port;
+	struct uart_port *port;
+	unsigned long flags;
+
+	if (co->index >= MAXPORTS)
+		return;
+
+	ltq_port = lqasc_port[co->index];
+	if (!ltq_port)
+		return;
+
+	port = &ltq_port->port;
+
+	spin_lock_irqsave(&ltq_asc_lock, flags);
+	uart_console_write(port, s, count, lqasc_console_putchar);
+	spin_unlock_irqrestore(&ltq_asc_lock, flags);
+}
+
+static int __init
+lqasc_console_setup(struct console *co, char *options)
+{
+	struct ltq_uart_port *ltq_port;
+	struct uart_port *port;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index >= MAXPORTS)
+		return -ENODEV;
+
+	ltq_port = lqasc_port[co->index];
+	if (!ltq_port)
+		return -ENODEV;
+
+	port = &ltq_port->port;
+
+	port->uartclk = clk_get_rate(ltq_port->clk);
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct console lqasc_console = {
+	.name =		"ttyLTQ",
+	.write =	lqasc_console_write,
+	.device =	uart_console_device,
+	.setup =	lqasc_console_setup,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+	.data =		&lqasc_reg,
+};
+
+static int __init
+lqasc_console_init(void)
+{
+	register_console(&lqasc_console);
+	return 0;
+}
+console_initcall(lqasc_console_init);
+
+static struct uart_driver lqasc_reg = {
+	.owner =	THIS_MODULE,
+	.driver_name =	DRVNAME,
+	.dev_name =	"ttyLTQ",
+	.major =	0,
+	.minor =	0,
+	.nr =		MAXPORTS,
+	.cons =		&lqasc_console,
+};
+
+static int __init
+lqasc_probe(struct platform_device *pdev)
+{
+	struct ltq_uart_port *ltq_port;
+	struct uart_port *port;
+	struct resource *mmres, *irqres;
+	int tx_irq, rx_irq, err_irq;
+	struct clk *clk;
+	int ret;
+
+	mmres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irqres = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!mmres || !irqres)
+		return -ENODEV;
+
+	if (pdev->id >= MAXPORTS)
+		return -EBUSY;
+
+	if (lqasc_port[pdev->id] != NULL)
+		return -EBUSY;
+
+	clk = clk_get(&pdev->dev, "fpi");
+	if (IS_ERR(clk)) {
+		pr_err("failed to get fpi clk\n");
+		return -ENOENT;
+	}
+
+	tx_irq = platform_get_irq_byname(pdev, "tx");
+	rx_irq = platform_get_irq_byname(pdev, "rx");
+	err_irq = platform_get_irq_byname(pdev, "err");
+	if ((tx_irq < 0) | (rx_irq < 0) | (err_irq < 0))
+		return -ENODEV;
+
+	ltq_port = kzalloc(sizeof(struct ltq_uart_port), GFP_KERNEL);
+	if (!ltq_port)
+		return -ENOMEM;
+
+	port = &ltq_port->port;
+
+	port->iotype	= SERIAL_IO_MEM;
+	port->flags	= ASYNC_BOOT_AUTOCONF | UPF_IOREMAP;
+	port->ops	= &lqasc_pops;
+	port->fifosize	= 16;
+	port->type	= PORT_LTQ_ASC,
+	port->line	= pdev->id;
+	port->dev	= &pdev->dev;
+
+	port->irq	= tx_irq; /* unused, just to be backward-compatibe */
+	port->mapbase	= mmres->start;
+
+	ltq_port->clk	= clk;
+
+	ltq_port->tx_irq = tx_irq;
+	ltq_port->rx_irq = rx_irq;
+	ltq_port->err_irq = err_irq;
+
+	lqasc_port[pdev->id] = ltq_port;
+	platform_set_drvdata(pdev, ltq_port);
+
+	ret = uart_add_one_port(&lqasc_reg, port);
+
+	return ret;
+}
+
+static struct platform_driver lqasc_driver = {
+	.driver		= {
+		.name	= DRVNAME,
+		.owner	= THIS_MODULE,
+	},
+};
+
+int __init
+init_lqasc(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&lqasc_reg);
+	if (ret != 0)
+		return ret;
+
+	ret = platform_driver_probe(&lqasc_driver, lqasc_probe);
+	if (ret != 0)
+		uart_unregister_driver(&lqasc_reg);
+
+	return ret;
+}
+
+module_init(init_lqasc);
+
+MODULE_DESCRIPTION("Lantiq serial port driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c
index 0e8eec516df4..c911b2419abb 100644
--- a/drivers/tty/serial/of_serial.c
+++ b/drivers/tty/serial/of_serial.c
@@ -80,14 +80,17 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev,
 /*
  * Try to register a serial port
  */
+static struct of_device_id of_platform_serial_table[];
 static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct of_serial_info *info;
 	struct uart_port port;
 	int port_type;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(of_platform_serial_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	if (of_find_property(ofdev->dev.of_node, "used-by-rtas", NULL))
@@ -97,7 +100,7 @@ static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 	if (info == NULL)
 		return -ENOMEM;
 
-	port_type = (unsigned long)ofdev->dev.of_match->data;
+	port_type = (unsigned long)match->data;
 	ret = of_platform_serial_setup(ofdev, port_type, &port);
 	if (ret)
 		goto out;
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 51fe1795d5a8..d2efe823c20d 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -381,7 +381,13 @@ static int uio_get_minor(struct uio_device *idev)
 			retval = -ENOMEM;
 		goto exit;
 	}
-	idev->minor = id & MAX_ID_MASK;
+	if (id < UIO_MAX_DEVICES) {
+		idev->minor = id;
+	} else {
+		dev_err(idev->dev, "too many uio devices\n");
+		retval = -EINVAL;
+		idr_remove(&uio_idr, id);
+	}
 exit:
 	mutex_unlock(&minor_lock);
 	return retval;
@@ -587,14 +593,12 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 
 static int uio_find_mem_index(struct vm_area_struct *vma)
 {
-	int mi;
 	struct uio_device *idev = vma->vm_private_data;
 
-	for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
-		if (idev->info->mem[mi].size == 0)
+	if (vma->vm_pgoff < MAX_UIO_MAPS) {
+		if (idev->info->mem[vma->vm_pgoff].size == 0)
 			return -1;
-		if (vma->vm_pgoff == mi)
-			return mi;
+		return (int)vma->vm_pgoff;
 	}
 	return -1;
 }
diff --git a/drivers/uio/uio_netx.c b/drivers/uio/uio_netx.c
index 5ffdb483b015..a879fd5741f8 100644
--- a/drivers/uio/uio_netx.c
+++ b/drivers/uio/uio_netx.c
@@ -18,6 +18,9 @@
 
 #define PCI_VENDOR_ID_HILSCHER		0x15CF
 #define PCI_DEVICE_ID_HILSCHER_NETX	0x0000
+#define PCI_DEVICE_ID_HILSCHER_NETPLC	0x0010
+#define PCI_SUBDEVICE_ID_NETPLC_RAM	0x0000
+#define PCI_SUBDEVICE_ID_NETPLC_FLASH	0x0001
 #define PCI_SUBDEVICE_ID_NXSB_PCA	0x3235
 #define PCI_SUBDEVICE_ID_NXPCA		0x3335
 
@@ -66,6 +69,10 @@ static int __devinit netx_pci_probe(struct pci_dev *dev,
 		bar = 0;
 		info->name = "netx";
 		break;
+	case PCI_DEVICE_ID_HILSCHER_NETPLC:
+		bar = 0;
+		info->name = "netplc";
+		break;
 	default:
 		bar = 2;
 		info->name = "netx_plx";
@@ -134,6 +141,18 @@ static struct pci_device_id netx_pci_ids[] = {
 		.subdevice =	0,
 	},
 	{
+		.vendor =       PCI_VENDOR_ID_HILSCHER,
+		.device =       PCI_DEVICE_ID_HILSCHER_NETPLC,
+		.subvendor =    PCI_VENDOR_ID_HILSCHER,
+		.subdevice =    PCI_SUBDEVICE_ID_NETPLC_RAM,
+	},
+	{
+		.vendor =       PCI_VENDOR_ID_HILSCHER,
+		.device =       PCI_DEVICE_ID_HILSCHER_NETPLC,
+		.subvendor =    PCI_VENDOR_ID_HILSCHER,
+		.subdevice =    PCI_SUBDEVICE_ID_NETPLC_FLASH,
+	},
+	{
 		.vendor =	PCI_VENDOR_ID_PLX,
 		.device =	PCI_DEVICE_ID_PLX_9030,
 		.subvendor =	PCI_VENDOR_ID_PLX,
diff --git a/drivers/uio/uio_pdrv_genirq.c b/drivers/uio/uio_pdrv_genirq.c
index 7174d518b8a6..0f424af7f109 100644
--- a/drivers/uio/uio_pdrv_genirq.c
+++ b/drivers/uio/uio_pdrv_genirq.c
@@ -189,6 +189,10 @@ static int uio_pdrv_genirq_remove(struct platform_device *pdev)
 
 	uio_unregister_device(priv->uioinfo);
 	pm_runtime_disable(&pdev->dev);
+
+	priv->uioinfo->handler = NULL;
+	priv->uioinfo->irqcontrol = NULL;
+
 	kfree(priv);
 	return 0;
 }
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 41b6e51188e4..006489d82dc3 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -66,6 +66,7 @@ config USB_ARCH_HAS_EHCI
 	default y if ARCH_VT8500
 	default y if PLAT_SPEAR
 	default y if ARCH_MSM
+	default y if MICROBLAZE
 	default PCI
 
 # ARM SA1111 chips have a non-PCI based "OHCI-compatible" USB host interface.
diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index a3d2e2399655..96fdfb815f89 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -221,7 +221,7 @@ static char *usb_dump_endpoint_descriptor(int speed, char *start, char *end,
 		break;
 	case USB_ENDPOINT_XFER_INT:
 		type = "Int.";
-		if (speed == USB_SPEED_HIGH)
+		if (speed == USB_SPEED_HIGH || speed == USB_SPEED_SUPER)
 			interval = 1 << (desc->bInterval - 1);
 		else
 			interval = desc->bInterval;
@@ -229,7 +229,8 @@ static char *usb_dump_endpoint_descriptor(int speed, char *start, char *end,
 	default:	/* "can't happen" */
 		return start;
 	}
-	interval *= (speed == USB_SPEED_HIGH) ? 125 : 1000;
+	interval *= (speed == USB_SPEED_HIGH ||
+		     speed == USB_SPEED_SUPER) ? 125 : 1000;
 	if (interval % 1000)
 		unit = 'u';
 	else {
@@ -542,8 +543,9 @@ static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes,
 	if (level == 0) {
 		int	max;
 
-		/* high speed reserves 80%, full/low reserves 90% */
-		if (usbdev->speed == USB_SPEED_HIGH)
+		/* super/high speed reserves 80%, full/low reserves 90% */
+		if (usbdev->speed == USB_SPEED_HIGH ||
+		    usbdev->speed == USB_SPEED_SUPER)
 			max = 800;
 		else
 			max = FRAME_TIME_MAX_USECS_ALLOC;
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 8eed05d23838..77a7faec8d78 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1908,7 +1908,7 @@ void usb_free_streams(struct usb_interface *interface,
 
 	/* Streams only apply to bulk endpoints. */
 	for (i = 0; i < num_eps; i++)
-		if (!usb_endpoint_xfer_bulk(&eps[i]->desc))
+		if (!eps[i] || !usb_endpoint_xfer_bulk(&eps[i]->desc))
 			return;
 
 	hcd->driver->free_streams(hcd, dev, eps, num_eps, mem_flags);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 8fb754916c67..93720bdc9efd 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2285,7 +2285,17 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
 	}
 
 	/* see 7.1.7.6 */
-	status = set_port_feature(hub->hdev, port1, USB_PORT_FEAT_SUSPEND);
+	/* Clear PORT_POWER if it's a USB3.0 device connected to USB 3.0
+	 * external hub.
+	 * FIXME: this is a temporary workaround to make the system able
+	 * to suspend/resume.
+	 */
+	if ((hub->hdev->parent != NULL) && hub_is_superspeed(hub->hdev))
+		status = clear_port_feature(hub->hdev, port1,
+						USB_PORT_FEAT_POWER);
+	else
+		status = set_port_feature(hub->hdev, port1,
+						USB_PORT_FEAT_SUSPEND);
 	if (status) {
 		dev_dbg(hub->intfdev, "can't suspend port %d, status %d\n",
 				port1, status);
diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 9abecfddb27d..0111f8a9cf7f 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -706,6 +706,7 @@ f_audio_unbind(struct usb_configuration *c, struct usb_function *f)
 	struct f_audio		*audio = func_to_audio(f);
 
 	usb_free_descriptors(f->descriptors);
+	usb_free_descriptors(f->hs_descriptors);
 	kfree(audio);
 }
 
diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c
index 95dd4662d6a8..b3c304290150 100644
--- a/drivers/usb/gadget/f_eem.c
+++ b/drivers/usb/gadget/f_eem.c
@@ -314,6 +314,9 @@ eem_unbind(struct usb_configuration *c, struct usb_function *f)
 
 static void eem_cmd_complete(struct usb_ep *ep, struct usb_request *req)
 {
+	struct sk_buff *skb = (struct sk_buff *)req->context;
+
+	dev_kfree_skb_any(skb);
 }
 
 /*
@@ -428,10 +431,11 @@ static int eem_unwrap(struct gether *port,
 				skb_trim(skb2, len);
 				put_unaligned_le16(BIT(15) | BIT(11) | len,
 							skb_push(skb2, 2));
-				skb_copy_bits(skb, 0, req->buf, skb->len);
-				req->length = skb->len;
+				skb_copy_bits(skb2, 0, req->buf, skb2->len);
+				req->length = skb2->len;
 				req->complete = eem_cmd_complete;
 				req->zero = 1;
+				req->context = skb2;
 				if (usb_ep_queue(port->in_ep, req, GFP_ATOMIC))
 					DBG(cdev, "echo response queue fail\n");
 				break;
diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index aee7e3c53c38..3a68e09309f7 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -1148,6 +1148,12 @@ static int qe_ep_tx(struct qe_ep *ep, struct qe_frame *frame)
 static int txcomplete(struct qe_ep *ep, unsigned char restart)
 {
 	if (ep->tx_req != NULL) {
+		struct qe_req *req = ep->tx_req;
+		unsigned zlp = 0, last_len = 0;
+
+		last_len = min_t(unsigned, req->req.length - ep->sent,
+				ep->ep.maxpacket);
+
 		if (!restart) {
 			int asent = ep->last;
 			ep->sent += asent;
@@ -1156,9 +1162,18 @@ static int txcomplete(struct qe_ep *ep, unsigned char restart)
 			ep->last = 0;
 		}
 
+		/* zlp needed when req->re.zero is set */
+		if (req->req.zero) {
+			if (last_len == 0 ||
+				(req->req.length % ep->ep.maxpacket) != 0)
+				zlp = 0;
+			else
+				zlp = 1;
+		} else
+			zlp = 0;
+
 		/* a request already were transmitted completely */
-		if ((ep->tx_req->req.length - ep->sent) <= 0) {
-			ep->tx_req->req.actual = (unsigned int)ep->sent;
+		if (((ep->tx_req->req.length - ep->sent) <= 0) && !zlp) {
 			done(ep, ep->tx_req, 0);
 			ep->tx_req = NULL;
 			ep->last = 0;
@@ -1191,6 +1206,7 @@ static int qe_usb_senddata(struct qe_ep *ep, struct qe_frame *frame)
 	buf = (u8 *)ep->tx_req->req.buf + ep->sent;
 	if (buf && size) {
 		ep->last = size;
+		ep->tx_req->req.actual += size;
 		frame_set_data(frame, buf);
 		frame_set_length(frame, size);
 		frame_set_status(frame, FRAME_OK);
@@ -2523,15 +2539,18 @@ static void qe_udc_release(struct device *dev)
 }
 
 /* Driver probe functions */
+static const struct of_device_id qe_udc_match[];
 static int __devinit qe_udc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct qe_ep *ep;
 	unsigned int ret = 0;
 	unsigned int i;
 	const void *prop;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(qe_udc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	prop = of_get_property(np, "mode", NULL);
@@ -2545,7 +2564,7 @@ static int __devinit qe_udc_probe(struct platform_device *ofdev)
 		return -ENOMEM;
 	}
 
-	udc_controller->soc_type = (unsigned long)ofdev->dev.of_match->data;
+	udc_controller->soc_type = (unsigned long)match->data;
 	udc_controller->usb_regs = of_iomap(np, 0);
 	if (!udc_controller->usb_regs) {
 		ret = -ENOMEM;
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 48a760220baf..bf6e11c758d5 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -38,6 +38,7 @@
 #include <linux/device.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
diff --git a/drivers/usb/gadget/imx_udc.c b/drivers/usb/gadget/imx_udc.c
index 5408186afc35..ade40066decf 100644
--- a/drivers/usb/gadget/imx_udc.c
+++ b/drivers/usb/gadget/imx_udc.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 3ed73f49cf18..a01383f71f38 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -386,8 +386,10 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
 
 	/* halt any endpoint by doing a "wrong direction" i/o call */
 	if (usb_endpoint_dir_in(&data->desc)) {
-		if (usb_endpoint_xfer_isoc(&data->desc))
+		if (usb_endpoint_xfer_isoc(&data->desc)) {
+			mutex_unlock(&data->lock);
 			return -EINVAL;
+		}
 		DBG (data->dev, "%s halt\n", data->name);
 		spin_lock_irq (&data->dev->lock);
 		if (likely (data->ep != NULL))
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index cb5cd422f3f5..82fd24935332 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -44,6 +44,7 @@
 #include <linux/usb/otg.h>
 #include <linux/dma-mapping.h>
 #include <linux/clk.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c
index 3e4b35e50c24..68dbcc3e4cc2 100644
--- a/drivers/usb/gadget/pch_udc.c
+++ b/drivers/usb/gadget/pch_udc.c
@@ -1608,7 +1608,7 @@ static int pch_udc_pcd_queue(struct usb_ep *usbep, struct usb_request *usbreq,
 		return -EINVAL;
 	if (!dev->driver || (dev->gadget.speed == USB_SPEED_UNKNOWN))
 		return -ESHUTDOWN;
-	spin_lock_irqsave(&ep->dev->lock, iflags);
+	spin_lock_irqsave(&dev->lock, iflags);
 	/* map the buffer for dma */
 	if (usbreq->length &&
 	    ((usbreq->dma == DMA_ADDR_INVALID) || !usbreq->dma)) {
@@ -1625,8 +1625,10 @@ static int pch_udc_pcd_queue(struct usb_ep *usbep, struct usb_request *usbreq,
 							     DMA_FROM_DEVICE);
 		} else {
 			req->buf = kzalloc(usbreq->length, GFP_ATOMIC);
-			if (!req->buf)
-				return -ENOMEM;
+			if (!req->buf) {
+				retval = -ENOMEM;
+				goto probe_end;
+			}
 			if (ep->in) {
 				memcpy(req->buf, usbreq->buf, usbreq->length);
 				req->dma = dma_map_single(&dev->pdev->dev,
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 444b60aa15e9..365c02fc25fc 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -46,6 +46,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/io.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <asm/dma.h>
diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c
index 78a39a41547d..57607696735c 100644
--- a/drivers/usb/gadget/pxa27x_udc.c
+++ b/drivers/usb/gadget/pxa27x_udc.c
@@ -32,6 +32,7 @@
 #include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <mach/hardware.h>
diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c
index 015118535f77..6dcc1f68fa60 100644
--- a/drivers/usb/gadget/r8a66597-udc.c
+++ b/drivers/usb/gadget/r8a66597-udc.c
@@ -1083,7 +1083,9 @@ static void irq_device_state(struct r8a66597 *r8a66597)
 
 	if (dvsq == DS_DFLT) {
 		/* bus reset */
+		spin_unlock(&r8a66597->lock);
 		r8a66597->driver->disconnect(&r8a66597->gadget);
+		spin_lock(&r8a66597->lock);
 		r8a66597_update_usb_speed(r8a66597);
 	}
 	if (r8a66597->old_dvsq == DS_CNFG && dvsq != DS_CNFG)
diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c
index 7e41a95c5ceb..627f3a678759 100644
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -40,6 +40,7 @@
 #include <linux/slab.h>
 #include <linux/usb/ulpi.h>
 #include <plat/usb.h>
+#include <linux/regulator/consumer.h>
 
 /* EHCI Register Set */
 #define EHCI_INSNREG04					(0xA0)
@@ -118,6 +119,8 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev)
 	struct ehci_hcd				*omap_ehci;
 	int					ret = -ENODEV;
 	int					irq;
+	int					i;
+	char					supply[7];
 
 	if (usb_disabled())
 		return -ENODEV;
@@ -158,6 +161,23 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev)
 	hcd->rsrc_len = resource_size(res);
 	hcd->regs = regs;
 
+	/* get ehci regulator and enable */
+	for (i = 0 ; i < OMAP3_HS_USB_PORTS ; i++) {
+		if (pdata->port_mode[i] != OMAP_EHCI_PORT_MODE_PHY) {
+			pdata->regulator[i] = NULL;
+			continue;
+		}
+		snprintf(supply, sizeof(supply), "hsusb%d", i);
+		pdata->regulator[i] = regulator_get(dev, supply);
+		if (IS_ERR(pdata->regulator[i])) {
+			pdata->regulator[i] = NULL;
+			dev_dbg(dev,
+			"failed to get ehci port%d regulator\n", i);
+		} else {
+			regulator_enable(pdata->regulator[i]);
+		}
+	}
+
 	ret = omap_usbhs_enable(dev);
 	if (ret) {
 		dev_err(dev, "failed to start usbhs with err %d\n", ret);
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 98ded66e8d3f..42abd0f603bf 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -1247,24 +1247,27 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 
 static void scan_async (struct ehci_hcd *ehci)
 {
+	bool			stopped;
 	struct ehci_qh		*qh;
 	enum ehci_timer_action	action = TIMER_IO_WATCHDOG;
 
 	ehci->stamp = ehci_readl(ehci, &ehci->regs->frame_index);
 	timer_action_done (ehci, TIMER_ASYNC_SHRINK);
 rescan:
+	stopped = !HC_IS_RUNNING(ehci_to_hcd(ehci)->state);
 	qh = ehci->async->qh_next.qh;
 	if (likely (qh != NULL)) {
 		do {
 			/* clean any finished work for this qh */
-			if (!list_empty (&qh->qtd_list)
-					&& qh->stamp != ehci->stamp) {
+			if (!list_empty(&qh->qtd_list) && (stopped ||
+					qh->stamp != ehci->stamp)) {
 				int temp;
 
 				/* unlinks could happen here; completion
 				 * reporting drops the lock.  rescan using
 				 * the latest schedule, but don't rescan
-				 * qhs we already finished (no looping).
+				 * qhs we already finished (no looping)
+				 * unless the controller is stopped.
 				 */
 				qh = qh_get (qh);
 				qh->stamp = ehci->stamp;
@@ -1285,9 +1288,9 @@ rescan:
 			 */
 			if (list_empty(&qh->qtd_list)
 					&& qh->qh_state == QH_STATE_LINKED) {
-				if (!ehci->reclaim
-					&& ((ehci->stamp - qh->stamp) & 0x1fff)
-						>= (EHCI_SHRINK_FRAMES * 8))
+				if (!ehci->reclaim && (stopped ||
+					((ehci->stamp - qh->stamp) & 0x1fff)
+						>= EHCI_SHRINK_FRAMES * 8))
 					start_unlink_async(ehci, qh);
 				else
 					action = TIMER_ASYNC_SHRINK;
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
index f97570a847ca..9c37dad3e816 100644
--- a/drivers/usb/host/isp1362-hcd.c
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -81,6 +81,7 @@
 #include <linux/pm.h>
 #include <linux/io.h>
 #include <linux/bitmap.h>
+#include <linux/prefetch.h>
 
 #include <asm/irq.h>
 #include <asm/system.h>
diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index f50e84ac570a..7b2e69aa2e98 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -295,7 +295,7 @@ static void alloc_mem(struct usb_hcd *hcd, struct isp1760_qtd *qtd)
 	}
 
 	dev_err(hcd->self.controller,
-				"%s: Can not allocate %lu bytes of memory\n"
+				"%s: Cannot allocate %zu bytes of memory\n"
 				"Current memory map:\n",
 				__func__, qtd->length);
 	for (i = 0; i < BLOCKS; i++) {
@@ -1633,6 +1633,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 			ints[i].qh = NULL;
 			ints[i].qtd = NULL;
 
+			urb->status = status;
 			isp1760_urb_done(hcd, urb);
 			if (qtd)
 				pe(hcd, qh, qtd);
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index 17a6043c1fa0..958d985f2951 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -33,7 +33,7 @@
 
 #ifdef __LITTLE_ENDIAN
 #define USBH_ENABLE_INIT (USBH_ENABLE_CE | USBH_ENABLE_E | USBH_ENABLE_C)
-#elif __BIG_ENDIAN
+#elif defined(__BIG_ENDIAN)
 #define USBH_ENABLE_INIT (USBH_ENABLE_CE | USBH_ENABLE_E | USBH_ENABLE_C | \
 			  USBH_ENABLE_BE)
 #else
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 1d586d4f7b56..9b166d70ae91 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -84,65 +84,92 @@ int usb_amd_find_chipset_info(void)
 {
 	u8 rev = 0;
 	unsigned long flags;
+	struct amd_chipset_info info;
+	int ret;
 
 	spin_lock_irqsave(&amd_lock, flags);
 
-	amd_chipset.probe_count++;
 	/* probe only once */
-	if (amd_chipset.probe_count > 1) {
+	if (amd_chipset.probe_count > 0) {
+		amd_chipset.probe_count++;
 		spin_unlock_irqrestore(&amd_lock, flags);
 		return amd_chipset.probe_result;
 	}
+	memset(&info, 0, sizeof(info));
+	spin_unlock_irqrestore(&amd_lock, flags);
 
-	amd_chipset.smbus_dev = pci_get_device(PCI_VENDOR_ID_ATI, 0x4385, NULL);
-	if (amd_chipset.smbus_dev) {
-		rev = amd_chipset.smbus_dev->revision;
+	info.smbus_dev = pci_get_device(PCI_VENDOR_ID_ATI, 0x4385, NULL);
+	if (info.smbus_dev) {
+		rev = info.smbus_dev->revision;
 		if (rev >= 0x40)
-			amd_chipset.sb_type = 1;
+			info.sb_type = 1;
 		else if (rev >= 0x30 && rev <= 0x3b)
-			amd_chipset.sb_type = 3;
+			info.sb_type = 3;
 	} else {
-		amd_chipset.smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD,
-							0x780b, NULL);
-		if (!amd_chipset.smbus_dev) {
-			spin_unlock_irqrestore(&amd_lock, flags);
-			return 0;
+		info.smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD,
+						0x780b, NULL);
+		if (!info.smbus_dev) {
+			ret = 0;
+			goto commit;
 		}
-		rev = amd_chipset.smbus_dev->revision;
+
+		rev = info.smbus_dev->revision;
 		if (rev >= 0x11 && rev <= 0x18)
-			amd_chipset.sb_type = 2;
+			info.sb_type = 2;
 	}
 
-	if (amd_chipset.sb_type == 0) {
-		if (amd_chipset.smbus_dev) {
-			pci_dev_put(amd_chipset.smbus_dev);
-			amd_chipset.smbus_dev = NULL;
+	if (info.sb_type == 0) {
+		if (info.smbus_dev) {
+			pci_dev_put(info.smbus_dev);
+			info.smbus_dev = NULL;
 		}
-		spin_unlock_irqrestore(&amd_lock, flags);
-		return 0;
+		ret = 0;
+		goto commit;
 	}
 
-	amd_chipset.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x9601, NULL);
-	if (amd_chipset.nb_dev) {
-		amd_chipset.nb_type = 1;
+	info.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x9601, NULL);
+	if (info.nb_dev) {
+		info.nb_type = 1;
 	} else {
-		amd_chipset.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
-							0x1510, NULL);
-		if (amd_chipset.nb_dev) {
-			amd_chipset.nb_type = 2;
-		} else  {
-			amd_chipset.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
-								0x9600, NULL);
-			if (amd_chipset.nb_dev)
-				amd_chipset.nb_type = 3;
+		info.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1510, NULL);
+		if (info.nb_dev) {
+			info.nb_type = 2;
+		} else {
+			info.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
+						     0x9600, NULL);
+			if (info.nb_dev)
+				info.nb_type = 3;
 		}
 	}
 
-	amd_chipset.probe_result = 1;
+	ret = info.probe_result = 1;
 	printk(KERN_DEBUG "QUIRK: Enable AMD PLL fix\n");
 
-	spin_unlock_irqrestore(&amd_lock, flags);
-	return amd_chipset.probe_result;
+commit:
+
+	spin_lock_irqsave(&amd_lock, flags);
+	if (amd_chipset.probe_count > 0) {
+		/* race - someone else was faster - drop devices */
+
+		/* Mark that we where here */
+		amd_chipset.probe_count++;
+		ret = amd_chipset.probe_result;
+
+		spin_unlock_irqrestore(&amd_lock, flags);
+
+		if (info.nb_dev)
+			pci_dev_put(info.nb_dev);
+		if (info.smbus_dev)
+			pci_dev_put(info.smbus_dev);
+
+	} else {
+		/* no race - commit the result */
+		info.probe_count++;
+		amd_chipset = info;
+		spin_unlock_irqrestore(&amd_lock, flags);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(usb_amd_find_chipset_info);
 
@@ -284,6 +311,7 @@ EXPORT_SYMBOL_GPL(usb_amd_quirk_pll_enable);
 
 void usb_amd_dev_put(void)
 {
+	struct pci_dev *nb, *smbus;
 	unsigned long flags;
 
 	spin_lock_irqsave(&amd_lock, flags);
@@ -294,20 +322,23 @@ void usb_amd_dev_put(void)
 		return;
 	}
 
-	if (amd_chipset.nb_dev) {
-		pci_dev_put(amd_chipset.nb_dev);
-		amd_chipset.nb_dev = NULL;
-	}
-	if (amd_chipset.smbus_dev) {
-		pci_dev_put(amd_chipset.smbus_dev);
-		amd_chipset.smbus_dev = NULL;
-	}
+	/* save them to pci_dev_put outside of spinlock */
+	nb    = amd_chipset.nb_dev;
+	smbus = amd_chipset.smbus_dev;
+
+	amd_chipset.nb_dev = NULL;
+	amd_chipset.smbus_dev = NULL;
 	amd_chipset.nb_type = 0;
 	amd_chipset.sb_type = 0;
 	amd_chipset.isoc_reqs = 0;
 	amd_chipset.probe_result = 0;
 
 	spin_unlock_irqrestore(&amd_lock, flags);
+
+	if (nb)
+		pci_dev_put(nb);
+	if (smbus)
+		pci_dev_put(smbus);
 }
 EXPORT_SYMBOL_GPL(usb_amd_dev_put);
 
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 18b7099a8125..fafccc2fd331 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -47,6 +47,7 @@
 #include <linux/usb/sl811.h>
 #include <linux/usb/hcd.h>
 #include <linux/platform_device.h>
+#include <linux/prefetch.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index a78f2ebd11b7..73f75d26436c 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -777,7 +777,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
 		if (t1 != t2)
 			xhci_writel(xhci, t2, port_array[port_index]);
 
-		if (DEV_HIGHSPEED(t1)) {
+		if (hcd->speed != HCD_USB3) {
 			/* enable remote wake up for USB 2.0 */
 			u32 __iomem *addr;
 			u32 tmp;
@@ -866,6 +866,21 @@ int xhci_bus_resume(struct usb_hcd *hcd)
 				temp |= PORT_LINK_STROBE | XDEV_U0;
 				xhci_writel(xhci, temp, port_array[port_index]);
 			}
+			/* wait for the port to enter U0 and report port link
+			 * state change.
+			 */
+			spin_unlock_irqrestore(&xhci->lock, flags);
+			msleep(20);
+			spin_lock_irqsave(&xhci->lock, flags);
+
+			/* Clear PLC */
+			temp = xhci_readl(xhci, port_array[port_index]);
+			if (temp & PORT_PLC) {
+				temp = xhci_port_state_to_neutral(temp);
+				temp |= PORT_PLC;
+				xhci_writel(xhci, temp, port_array[port_index]);
+			}
+
 			slot_id = xhci_find_slot_id_by_port(hcd,
 					xhci, port_index + 1);
 			if (slot_id)
@@ -873,7 +888,7 @@ int xhci_bus_resume(struct usb_hcd *hcd)
 		} else
 			xhci_writel(xhci, temp, port_array[port_index]);
 
-		if (DEV_HIGHSPEED(temp)) {
+		if (hcd->speed != HCD_USB3) {
 			/* disable remote wake up for USB 2.0 */
 			u32 __iomem *addr;
 			u32 tmp;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index a003e79aacdc..627f3438028c 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -846,7 +846,7 @@ static u32 xhci_find_real_port_number(struct xhci_hcd *xhci,
 		 * Skip ports that don't have known speeds, or have duplicate
 		 * Extended Capabilities port speed entries.
 		 */
-		if (port_speed == 0 || port_speed == -1)
+		if (port_speed == 0 || port_speed == DUPLICATE_ENTRY)
 			continue;
 
 		/*
@@ -974,6 +974,47 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	return 0;
 }
 
+/*
+ * Convert interval expressed as 2^(bInterval - 1) == interval into
+ * straight exponent value 2^n == interval.
+ *
+ */
+static unsigned int xhci_parse_exponent_interval(struct usb_device *udev,
+		struct usb_host_endpoint *ep)
+{
+	unsigned int interval;
+
+	interval = clamp_val(ep->desc.bInterval, 1, 16) - 1;
+	if (interval != ep->desc.bInterval - 1)
+		dev_warn(&udev->dev,
+			 "ep %#x - rounding interval to %d microframes\n",
+			 ep->desc.bEndpointAddress,
+			 1 << interval);
+
+	return interval;
+}
+
+/*
+ * Convert bInterval expressed in frames (in 1-255 range) to exponent of
+ * microframes, rounded down to nearest power of 2.
+ */
+static unsigned int xhci_parse_frame_interval(struct usb_device *udev,
+		struct usb_host_endpoint *ep)
+{
+	unsigned int interval;
+
+	interval = fls(8 * ep->desc.bInterval) - 1;
+	interval = clamp_val(interval, 3, 10);
+	if ((1 << interval) != 8 * ep->desc.bInterval)
+		dev_warn(&udev->dev,
+			 "ep %#x - rounding interval to %d microframes, ep desc says %d microframes\n",
+			 ep->desc.bEndpointAddress,
+			 1 << interval,
+			 8 * ep->desc.bInterval);
+
+	return interval;
+}
+
 /* Return the polling or NAK interval.
  *
  * The polling interval is expressed in "microframes".  If xHCI's Interval field
@@ -982,7 +1023,7 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
  * The NAK interval is one NAK per 1 to 255 microframes, or no NAKs if interval
  * is set to 0.
  */
-static inline unsigned int xhci_get_endpoint_interval(struct usb_device *udev,
+static unsigned int xhci_get_endpoint_interval(struct usb_device *udev,
 		struct usb_host_endpoint *ep)
 {
 	unsigned int interval = 0;
@@ -991,45 +1032,38 @@ static inline unsigned int xhci_get_endpoint_interval(struct usb_device *udev,
 	case USB_SPEED_HIGH:
 		/* Max NAK rate */
 		if (usb_endpoint_xfer_control(&ep->desc) ||
-				usb_endpoint_xfer_bulk(&ep->desc))
+		    usb_endpoint_xfer_bulk(&ep->desc)) {
 			interval = ep->desc.bInterval;
+			break;
+		}
 		/* Fall through - SS and HS isoc/int have same decoding */
+
 	case USB_SPEED_SUPER:
 		if (usb_endpoint_xfer_int(&ep->desc) ||
-				usb_endpoint_xfer_isoc(&ep->desc)) {
-			if (ep->desc.bInterval == 0)
-				interval = 0;
-			else
-				interval = ep->desc.bInterval - 1;
-			if (interval > 15)
-				interval = 15;
-			if (interval != ep->desc.bInterval + 1)
-				dev_warn(&udev->dev, "ep %#x - rounding interval to %d microframes\n",
-						ep->desc.bEndpointAddress, 1 << interval);
+		    usb_endpoint_xfer_isoc(&ep->desc)) {
+			interval = xhci_parse_exponent_interval(udev, ep);
 		}
 		break;
-	/* Convert bInterval (in 1-255 frames) to microframes and round down to
-	 * nearest power of 2.
-	 */
+
 	case USB_SPEED_FULL:
+		if (usb_endpoint_xfer_int(&ep->desc)) {
+			interval = xhci_parse_exponent_interval(udev, ep);
+			break;
+		}
+		/*
+		 * Fall through for isochronous endpoint interval decoding
+		 * since it uses the same rules as low speed interrupt
+		 * endpoints.
+		 */
+
 	case USB_SPEED_LOW:
 		if (usb_endpoint_xfer_int(&ep->desc) ||
-				usb_endpoint_xfer_isoc(&ep->desc)) {
-			interval = fls(8*ep->desc.bInterval) - 1;
-			if (interval > 10)
-				interval = 10;
-			if (interval < 3)
-				interval = 3;
-			if ((1 << interval) != 8*ep->desc.bInterval)
-				dev_warn(&udev->dev,
-						"ep %#x - rounding interval"
-						" to %d microframes, "
-						"ep desc says %d microframes\n",
-						ep->desc.bEndpointAddress,
-						1 << interval,
-						8*ep->desc.bInterval);
+		    usb_endpoint_xfer_isoc(&ep->desc)) {
+
+			interval = xhci_parse_frame_interval(udev, ep);
 		}
 		break;
+
 	default:
 		BUG();
 	}
@@ -1041,7 +1075,7 @@ static inline unsigned int xhci_get_endpoint_interval(struct usb_device *udev,
  * transaction opportunities per microframe", but that goes in the Max Burst
  * endpoint context field.
  */
-static inline u32 xhci_get_endpoint_mult(struct usb_device *udev,
+static u32 xhci_get_endpoint_mult(struct usb_device *udev,
 		struct usb_host_endpoint *ep)
 {
 	if (udev->speed != USB_SPEED_SUPER ||
@@ -1050,7 +1084,7 @@ static inline u32 xhci_get_endpoint_mult(struct usb_device *udev,
 	return ep->ss_ep_comp.bmAttributes;
 }
 
-static inline u32 xhci_get_endpoint_type(struct usb_device *udev,
+static u32 xhci_get_endpoint_type(struct usb_device *udev,
 		struct usb_host_endpoint *ep)
 {
 	int in;
@@ -1084,7 +1118,7 @@ static inline u32 xhci_get_endpoint_type(struct usb_device *udev,
  * Basically, this is the maxpacket size, multiplied by the burst size
  * and mult size.
  */
-static inline u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci,
+static u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci,
 		struct usb_device *udev,
 		struct usb_host_endpoint *ep)
 {
@@ -1727,12 +1761,12 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
 			 * found a similar duplicate.
 			 */
 			if (xhci->port_array[i] != major_revision &&
-				xhci->port_array[i] != (u8) -1) {
+				xhci->port_array[i] != DUPLICATE_ENTRY) {
 				if (xhci->port_array[i] == 0x03)
 					xhci->num_usb3_ports--;
 				else
 					xhci->num_usb2_ports--;
-				xhci->port_array[i] = (u8) -1;
+				xhci->port_array[i] = DUPLICATE_ENTRY;
 			}
 			/* FIXME: Should we disable the port? */
 			continue;
@@ -1831,7 +1865,7 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags)
 		for (i = 0; i < num_ports; i++) {
 			if (xhci->port_array[i] == 0x03 ||
 					xhci->port_array[i] == 0 ||
-					xhci->port_array[i] == -1)
+					xhci->port_array[i] == DUPLICATE_ENTRY)
 				continue;
 
 			xhci->usb2_ports[port_index] =
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index ceea9f33491c..a10494c2f3c7 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -114,6 +114,10 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
 	if (pdev->vendor == PCI_VENDOR_ID_NEC)
 		xhci->quirks |= XHCI_NEC_HOST;
 
+	/* AMD PLL quirk */
+	if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
+		xhci->quirks |= XHCI_AMD_PLL_FIX;
+
 	/* Make sure the HC is halted. */
 	retval = xhci_halt(xhci);
 	if (retval)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index cfc1ad92473f..7437386a9a50 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -93,7 +93,7 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg,
 /* Does this link TRB point to the first segment in a ring,
  * or was the previous TRB the last TRB on the last segment in the ERST?
  */
-static inline bool last_trb_on_last_seg(struct xhci_hcd *xhci, struct xhci_ring *ring,
+static bool last_trb_on_last_seg(struct xhci_hcd *xhci, struct xhci_ring *ring,
 		struct xhci_segment *seg, union xhci_trb *trb)
 {
 	if (ring == xhci->event_ring)
@@ -107,7 +107,7 @@ static inline bool last_trb_on_last_seg(struct xhci_hcd *xhci, struct xhci_ring
  * segment?  I.e. would the updated event TRB pointer step off the end of the
  * event seg?
  */
-static inline int last_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
+static int last_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
 		struct xhci_segment *seg, union xhci_trb *trb)
 {
 	if (ring == xhci->event_ring)
@@ -116,7 +116,7 @@ static inline int last_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
 		return (trb->link.control & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK);
 }
 
-static inline int enqueue_is_link_trb(struct xhci_ring *ring)
+static int enqueue_is_link_trb(struct xhci_ring *ring)
 {
 	struct xhci_link_trb *link = &ring->enqueue->link;
 	return ((link->control & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK));
@@ -592,7 +592,7 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
 	ep->ep_state |= SET_DEQ_PENDING;
 }
 
-static inline void xhci_stop_watchdog_timer_in_irq(struct xhci_hcd *xhci,
+static void xhci_stop_watchdog_timer_in_irq(struct xhci_hcd *xhci,
 		struct xhci_virt_ep *ep)
 {
 	ep->ep_state &= ~EP_HALT_PENDING;
@@ -619,6 +619,13 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci,
 
 	/* Only giveback urb when this is the last td in urb */
 	if (urb_priv->td_cnt == urb_priv->length) {
+		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+			xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs--;
+			if (xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs	== 0) {
+				if (xhci->quirks & XHCI_AMD_PLL_FIX)
+					usb_amd_quirk_pll_enable();
+			}
+		}
 		usb_hcd_unlink_urb_from_ep(hcd, urb);
 		xhci_dbg(xhci, "Giveback %s URB %p\n", adjective, urb);
 
@@ -1209,7 +1216,7 @@ static unsigned int find_faked_portnum_from_hw_portnum(struct usb_hcd *hcd,
 		 * Skip ports that don't have known speeds, or have duplicate
 		 * Extended Capabilities port speed entries.
 		 */
-		if (port_speed == 0 || port_speed == -1)
+		if (port_speed == 0 || port_speed == DUPLICATE_ENTRY)
 			continue;
 
 		/*
@@ -1235,6 +1242,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
 	u8 major_revision;
 	struct xhci_bus_state *bus_state;
 	u32 __iomem **port_array;
+	bool bogus_port_status = false;
 
 	/* Port status change events always have a successful completion code */
 	if (GET_COMP_CODE(event->generic.field[2]) != COMP_SUCCESS) {
@@ -1247,6 +1255,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
 	max_ports = HCS_MAX_PORTS(xhci->hcs_params1);
 	if ((port_id <= 0) || (port_id > max_ports)) {
 		xhci_warn(xhci, "Invalid port id %d\n", port_id);
+		bogus_port_status = true;
 		goto cleanup;
 	}
 
@@ -1258,12 +1267,14 @@ static void handle_port_status(struct xhci_hcd *xhci,
 		xhci_warn(xhci, "Event for port %u not in "
 				"Extended Capabilities, ignoring.\n",
 				port_id);
+		bogus_port_status = true;
 		goto cleanup;
 	}
-	if (major_revision == (u8) -1) {
+	if (major_revision == DUPLICATE_ENTRY) {
 		xhci_warn(xhci, "Event for port %u duplicated in"
 				"Extended Capabilities, ignoring.\n",
 				port_id);
+		bogus_port_status = true;
 		goto cleanup;
 	}
 
@@ -1335,6 +1346,13 @@ cleanup:
 	/* Update event ring dequeue pointer before dropping the lock */
 	inc_deq(xhci, xhci->event_ring, true);
 
+	/* Don't make the USB core poll the roothub if we got a bad port status
+	 * change event.  Besides, at that point we can't tell which roothub
+	 * (USB 2.0 or USB 3.0) to kick.
+	 */
+	if (bogus_port_status)
+		return;
+
 	spin_unlock(&xhci->lock);
 	/* Pass this up to the core */
 	usb_hcd_poll_rh_status(hcd);
@@ -1554,8 +1572,17 @@ td_cleanup:
 
 		urb_priv->td_cnt++;
 		/* Giveback the urb when all the tds are completed */
-		if (urb_priv->td_cnt == urb_priv->length)
+		if (urb_priv->td_cnt == urb_priv->length) {
 			ret = 1;
+			if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+				xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs--;
+				if (xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs
+					== 0) {
+					if (xhci->quirks & XHCI_AMD_PLL_FIX)
+						usb_amd_quirk_pll_enable();
+				}
+			}
+		}
 	}
 
 	return ret;
@@ -1675,71 +1702,52 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
 	struct urb_priv *urb_priv;
 	int idx;
 	int len = 0;
-	int skip_td = 0;
 	union xhci_trb *cur_trb;
 	struct xhci_segment *cur_seg;
+	struct usb_iso_packet_descriptor *frame;
 	u32 trb_comp_code;
+	bool skip_td = false;
 
 	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
 	trb_comp_code = GET_COMP_CODE(event->transfer_len);
 	urb_priv = td->urb->hcpriv;
 	idx = urb_priv->td_cnt;
+	frame = &td->urb->iso_frame_desc[idx];
 
-	if (ep->skip) {
-		/* The transfer is partly done */
-		*status = -EXDEV;
-		td->urb->iso_frame_desc[idx].status = -EXDEV;
-	} else {
-		/* handle completion code */
-		switch (trb_comp_code) {
-		case COMP_SUCCESS:
-			td->urb->iso_frame_desc[idx].status = 0;
-			xhci_dbg(xhci, "Successful isoc transfer!\n");
-			break;
-		case COMP_SHORT_TX:
-			if (td->urb->transfer_flags & URB_SHORT_NOT_OK)
-				td->urb->iso_frame_desc[idx].status =
-					 -EREMOTEIO;
-			else
-				td->urb->iso_frame_desc[idx].status = 0;
-			break;
-		case COMP_BW_OVER:
-			td->urb->iso_frame_desc[idx].status = -ECOMM;
-			skip_td = 1;
-			break;
-		case COMP_BUFF_OVER:
-		case COMP_BABBLE:
-			td->urb->iso_frame_desc[idx].status = -EOVERFLOW;
-			skip_td = 1;
-			break;
-		case COMP_STALL:
-			td->urb->iso_frame_desc[idx].status = -EPROTO;
-			skip_td = 1;
-			break;
-		case COMP_STOP:
-		case COMP_STOP_INVAL:
-			break;
-		default:
-			td->urb->iso_frame_desc[idx].status = -1;
-			break;
-		}
-	}
-
-	/* calc actual length */
-	if (ep->skip) {
-		td->urb->iso_frame_desc[idx].actual_length = 0;
-		/* Update ring dequeue pointer */
-		while (ep_ring->dequeue != td->last_trb)
-			inc_deq(xhci, ep_ring, false);
-		inc_deq(xhci, ep_ring, false);
-		return finish_td(xhci, td, event_trb, event, ep, status, true);
+	/* handle completion code */
+	switch (trb_comp_code) {
+	case COMP_SUCCESS:
+		frame->status = 0;
+		xhci_dbg(xhci, "Successful isoc transfer!\n");
+		break;
+	case COMP_SHORT_TX:
+		frame->status = td->urb->transfer_flags & URB_SHORT_NOT_OK ?
+				-EREMOTEIO : 0;
+		break;
+	case COMP_BW_OVER:
+		frame->status = -ECOMM;
+		skip_td = true;
+		break;
+	case COMP_BUFF_OVER:
+	case COMP_BABBLE:
+		frame->status = -EOVERFLOW;
+		skip_td = true;
+		break;
+	case COMP_STALL:
+		frame->status = -EPROTO;
+		skip_td = true;
+		break;
+	case COMP_STOP:
+	case COMP_STOP_INVAL:
+		break;
+	default:
+		frame->status = -1;
+		break;
 	}
 
-	if (trb_comp_code == COMP_SUCCESS || skip_td == 1) {
-		td->urb->iso_frame_desc[idx].actual_length =
-			td->urb->iso_frame_desc[idx].length;
-		td->urb->actual_length +=
-			td->urb->iso_frame_desc[idx].length;
+	if (trb_comp_code == COMP_SUCCESS || skip_td) {
+		frame->actual_length = frame->length;
+		td->urb->actual_length += frame->length;
 	} else {
 		for (cur_trb = ep_ring->dequeue,
 		     cur_seg = ep_ring->deq_seg; cur_trb != event_trb;
@@ -1755,7 +1763,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
 			TRB_LEN(event->transfer_len);
 
 		if (trb_comp_code != COMP_STOP_INVAL) {
-			td->urb->iso_frame_desc[idx].actual_length = len;
+			frame->actual_length = len;
 			td->urb->actual_length += len;
 		}
 	}
@@ -1766,6 +1774,35 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
 	return finish_td(xhci, td, event_trb, event, ep, status, false);
 }
 
+static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
+			struct xhci_transfer_event *event,
+			struct xhci_virt_ep *ep, int *status)
+{
+	struct xhci_ring *ep_ring;
+	struct urb_priv *urb_priv;
+	struct usb_iso_packet_descriptor *frame;
+	int idx;
+
+	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
+	urb_priv = td->urb->hcpriv;
+	idx = urb_priv->td_cnt;
+	frame = &td->urb->iso_frame_desc[idx];
+
+	/* The transfer is partly done */
+	*status = -EXDEV;
+	frame->status = -EXDEV;
+
+	/* calc actual length */
+	frame->actual_length = 0;
+
+	/* Update ring dequeue pointer */
+	while (ep_ring->dequeue != td->last_trb)
+		inc_deq(xhci, ep_ring, false);
+	inc_deq(xhci, ep_ring, false);
+
+	return finish_td(xhci, td, NULL, event, ep, status, true);
+}
+
 /*
  * Process bulk and interrupt tds, update urb status and actual_length.
  */
@@ -2024,36 +2061,42 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		}
 
 		td = list_entry(ep_ring->td_list.next, struct xhci_td, td_list);
+
 		/* Is this a TRB in the currently executing TD? */
 		event_seg = trb_in_td(ep_ring->deq_seg, ep_ring->dequeue,
 				td->last_trb, event_dma);
-		if (event_seg && ep->skip) {
+		if (!event_seg) {
+			if (!ep->skip ||
+			    !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+				/* HC is busted, give up! */
+				xhci_err(xhci,
+					"ERROR Transfer event TRB DMA ptr not "
+					"part of current TD\n");
+				return -ESHUTDOWN;
+			}
+
+			ret = skip_isoc_td(xhci, td, event, ep, &status);
+			goto cleanup;
+		}
+
+		if (ep->skip) {
 			xhci_dbg(xhci, "Found td. Clear skip flag.\n");
 			ep->skip = false;
 		}
-		if (!event_seg &&
-		   (!ep->skip || !usb_endpoint_xfer_isoc(&td->urb->ep->desc))) {
-			/* HC is busted, give up! */
-			xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not "
-					"part of current TD\n");
-			return -ESHUTDOWN;
-		}
 
-		if (event_seg) {
-			event_trb = &event_seg->trbs[(event_dma -
-					 event_seg->dma) / sizeof(*event_trb)];
-			/*
-			 * No-op TRB should not trigger interrupts.
-			 * If event_trb is a no-op TRB, it means the
-			 * corresponding TD has been cancelled. Just ignore
-			 * the TD.
-			 */
-			if ((event_trb->generic.field[3] & TRB_TYPE_BITMASK)
-					 == TRB_TYPE(TRB_TR_NOOP)) {
-				xhci_dbg(xhci, "event_trb is a no-op TRB. "
-						"Skip it\n");
-				goto cleanup;
-			}
+		event_trb = &event_seg->trbs[(event_dma - event_seg->dma) /
+						sizeof(*event_trb)];
+		/*
+		 * No-op TRB should not trigger interrupts.
+		 * If event_trb is a no-op TRB, it means the
+		 * corresponding TD has been cancelled. Just ignore
+		 * the TD.
+		 */
+		if ((event_trb->generic.field[3] & TRB_TYPE_BITMASK)
+				 == TRB_TYPE(TRB_TR_NOOP)) {
+			xhci_dbg(xhci,
+				 "event_trb is a no-op TRB. Skip it\n");
+			goto cleanup;
 		}
 
 		/* Now update the urb's actual_length and give back to
@@ -3126,6 +3169,12 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		}
 	}
 
+	if (xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs == 0) {
+		if (xhci->quirks & XHCI_AMD_PLL_FIX)
+			usb_amd_quirk_pll_disable();
+	}
+	xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs++;
+
 	giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
 			start_cycle, start_trb);
 	return 0;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 196e0181b2ed..81b976e45880 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -550,6 +550,9 @@ void xhci_stop(struct usb_hcd *hcd)
 	del_timer_sync(&xhci->event_ring_timer);
 #endif
 
+	if (xhci->quirks & XHCI_AMD_PLL_FIX)
+		usb_amd_dev_put();
+
 	xhci_dbg(xhci, "// Disabling event ring interrupts\n");
 	temp = xhci_readl(xhci, &xhci->op_regs->status);
 	xhci_writel(xhci, temp & ~STS_EINT, &xhci->op_regs->status);
@@ -771,7 +774,9 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 
 	/* If restore operation fails, re-initialize the HC during resume */
 	if ((temp & STS_SRE) || hibernated) {
-		usb_root_hub_lost_power(hcd->self.root_hub);
+		/* Let the USB core know _both_ roothubs lost power. */
+		usb_root_hub_lost_power(xhci->main_hcd->self.root_hub);
+		usb_root_hub_lost_power(xhci->shared_hcd->self.root_hub);
 
 		xhci_dbg(xhci, "Stop HCD\n");
 		xhci_halt(xhci);
@@ -2386,10 +2391,18 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev)
 	/* Everything but endpoint 0 is disabled, so free or cache the rings. */
 	last_freed_endpoint = 1;
 	for (i = 1; i < 31; ++i) {
-		if (!virt_dev->eps[i].ring)
-			continue;
-		xhci_free_or_cache_endpoint_ring(xhci, virt_dev, i);
-		last_freed_endpoint = i;
+		struct xhci_virt_ep *ep = &virt_dev->eps[i];
+
+		if (ep->ep_state & EP_HAS_STREAMS) {
+			xhci_free_stream_info(xhci, ep->stream_info);
+			ep->stream_info = NULL;
+			ep->ep_state &= ~EP_HAS_STREAMS;
+		}
+
+		if (ep->ring) {
+			xhci_free_or_cache_endpoint_ring(xhci, virt_dev, i);
+			last_freed_endpoint = i;
+		}
 	}
 	xhci_dbg(xhci, "Output context after successful reset device cmd:\n");
 	xhci_dbg_ctx(xhci, virt_dev->out_ctx, last_freed_endpoint);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 07e263063e37..ba1be6b7cc6d 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -30,6 +30,7 @@
 
 /* Code sharing between pci-quirks and xhci hcd */
 #include	"xhci-ext-caps.h"
+#include "pci-quirks.h"
 
 /* xHCI PCI Configuration Registers */
 #define XHCI_SBRN_OFFSET	(0x60)
@@ -232,7 +233,7 @@ struct xhci_op_regs {
  * notification type that matches a bit set in this bit field.
  */
 #define	DEV_NOTE_MASK		(0xffff)
-#define ENABLE_DEV_NOTE(x)	(1 << x)
+#define ENABLE_DEV_NOTE(x)	(1 << (x))
 /* Most of the device notification types should only be used for debug.
  * SW does need to pay attention to function wake notifications.
  */
@@ -348,6 +349,9 @@ struct xhci_op_regs {
 /* Initiate a warm port reset - complete when PORT_WRC is '1' */
 #define PORT_WR		(1 << 31)
 
+/* We mark duplicate entries with -1 */
+#define DUPLICATE_ENTRY ((u8)(-1))
+
 /* Port Power Management Status and Control - port_power_base bitmasks */
 /* Inactivity timer value for transitions into U1, in microseconds.
  * Timeout can be up to 127us.  0xFF means an infinite timeout.
@@ -601,11 +605,11 @@ struct xhci_ep_ctx {
 #define EP_STATE_STOPPED	3
 #define EP_STATE_ERROR		4
 /* Mult - Max number of burtst within an interval, in EP companion desc. */
-#define EP_MULT(p)		((p & 0x3) << 8)
+#define EP_MULT(p)		(((p) & 0x3) << 8)
 /* bits 10:14 are Max Primary Streams */
 /* bit 15 is Linear Stream Array */
 /* Interval - period between requests to an endpoint - 125u increments. */
-#define EP_INTERVAL(p)		((p & 0xff) << 16)
+#define EP_INTERVAL(p)		(((p) & 0xff) << 16)
 #define EP_INTERVAL_TO_UFRAMES(p)		(1 << (((p) >> 16) & 0xff))
 #define EP_MAXPSTREAMS_MASK	(0x1f << 10)
 #define EP_MAXPSTREAMS(p)	(((p) << 10) & EP_MAXPSTREAMS_MASK)
@@ -1276,6 +1280,7 @@ struct xhci_hcd {
 #define	XHCI_LINK_TRB_QUIRK	(1 << 0)
 #define XHCI_RESET_EP_QUIRK	(1 << 1)
 #define XHCI_NEC_HOST		(1 << 2)
+#define XHCI_AMD_PLL_FIX	(1 << 3)
 	/* There are two roothubs to keep track of bus suspend info for */
 	struct xhci_bus_state   bus_state[2];
 	/* Is each xHCI roothub port a USB 3.0, USB 2.0, or USB 1.1 port? */
diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 4cbb7e4b368d..74073b363c30 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -14,7 +14,7 @@ config USB_MUSB_HDRC
 	select TWL4030_USB if MACH_OMAP_3430SDP
 	select TWL6030_USB if MACH_OMAP_4430SDP || MACH_OMAP4_PANDA
 	select USB_OTG_UTILS
-	tristate 'Inventra Highspeed Dual Role Controller (TI, ADI, ...)'
+	bool 'Inventra Highspeed Dual Role Controller (TI, ADI, ...)'
 	help
 	  Say Y here if your system has a dual role high speed USB
 	  controller based on the Mentor Graphics silicon IP.  Then
@@ -30,8 +30,8 @@ config USB_MUSB_HDRC
 
 	  If you do not know what this is, please say N.
 
-	  To compile this driver as a module, choose M here; the
-	  module will be called "musb-hdrc".
+#	  To compile this driver as a module, choose M here; the
+#	  module will be called "musb-hdrc".
 
 choice
 	prompt "Platform Glue Layer"
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 52312e8af213..8e2a1ff8a35a 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -21,6 +21,7 @@
 #include <asm/cacheflush.h>
 
 #include "musb_core.h"
+#include "musbhsdma.h"
 #include "blackfin.h"
 
 struct bfin_glue {
@@ -332,6 +333,27 @@ static int bfin_musb_set_mode(struct musb *musb, u8 musb_mode)
 	return -EIO;
 }
 
+static int bfin_musb_adjust_channel_params(struct dma_channel *channel,
+				u16 packet_sz, u8 *mode,
+				dma_addr_t *dma_addr, u32 *len)
+{
+	struct musb_dma_channel *musb_channel = channel->private_data;
+
+	/*
+	 * Anomaly 05000450 might cause data corruption when using DMA
+	 * MODE 1 transmits with short packet.  So to work around this,
+	 * we truncate all MODE 1 transfers down to a multiple of the
+	 * max packet size, and then do the last short packet transfer
+	 * (if there is any) using MODE 0.
+	 */
+	if (ANOMALY_05000450) {
+		if (musb_channel->transmit && *mode == 1)
+			*len = *len - (*len % packet_sz);
+	}
+
+	return 0;
+}
+
 static void bfin_musb_reg_init(struct musb *musb)
 {
 	if (ANOMALY_05000346) {
@@ -430,6 +452,8 @@ static const struct musb_platform_ops bfin_ops = {
 
 	.vbus_status	= bfin_musb_vbus_status,
 	.set_vbus	= bfin_musb_set_vbus,
+
+	.adjust_channel_params = bfin_musb_adjust_channel_params,
 };
 
 static u64 bfin_dmamask = DMA_BIT_MASK(32);
diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c
index de55a3c3259a..ab434fbd8c35 100644
--- a/drivers/usb/musb/cppi_dma.c
+++ b/drivers/usb/musb/cppi_dma.c
@@ -597,12 +597,12 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
 		length = min(n_bds * maxpacket, length);
 	}
 
-	DBG(4, "TX DMA%d, pktSz %d %s bds %d dma 0x%x len %u\n",
+	DBG(4, "TX DMA%d, pktSz %d %s bds %d dma 0x%llx len %u\n",
 			tx->index,
 			maxpacket,
 			rndis ? "rndis" : "transparent",
 			n_bds,
-			addr, length);
+			(unsigned long long)addr, length);
 
 	cppi_rndis_update(tx, 0, musb->ctrl_base, rndis);
 
@@ -820,7 +820,7 @@ cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket)
 	length = min(n_bds * maxpacket, length);
 
 	DBG(4, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) "
-			"dma 0x%x len %u %u/%u\n",
+			"dma 0x%llx len %u %u/%u\n",
 			rx->index, maxpacket,
 			onepacket
 				? (is_rndis ? "rndis" : "onepacket")
@@ -829,7 +829,8 @@ cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket)
 			musb_readl(tibase,
 				DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
 					& 0xffff,
-			addr, length, rx->channel.actual_len, rx->buf_len);
+			(unsigned long long)addr, length,
+			rx->channel.actual_len, rx->buf_len);
 
 	/* only queue one segment at a time, since the hardware prevents
 	 * correct queue shutdown after unexpected short packets
@@ -1039,9 +1040,9 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
 		if (!completed && (bd->hw_options & CPPI_OWN_SET))
 			break;
 
-		DBG(5, "C/RXBD %08x: nxt %08x buf %08x "
+		DBG(5, "C/RXBD %llx: nxt %08x buf %08x "
 			"off.len %08x opt.len %08x (%d)\n",
-			bd->dma, bd->hw_next, bd->hw_bufp,
+			(unsigned long long)bd->dma, bd->hw_next, bd->hw_bufp,
 			bd->hw_off_len, bd->hw_options,
 			rx->channel.actual_len);
 
@@ -1111,11 +1112,12 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
 		musb_ep_select(cppi->mregs, rx->index + 1);
 		csr = musb_readw(regs, MUSB_RXCSR);
 		if (csr & MUSB_RXCSR_DMAENAB) {
-			DBG(4, "list%d %p/%p, last %08x%s, csr %04x\n",
+			DBG(4, "list%d %p/%p, last %llx%s, csr %04x\n",
 				rx->index,
 				rx->head, rx->tail,
 				rx->last_processed
-					? rx->last_processed->dma
+					? (unsigned long long)
+						rx->last_processed->dma
 					: 0,
 				completed ? ", completed" : "",
 				csr);
@@ -1167,8 +1169,11 @@ irqreturn_t cppi_interrupt(int irq, void *dev_id)
 	tx = musb_readl(tibase, DAVINCI_TXCPPI_MASKED_REG);
 	rx = musb_readl(tibase, DAVINCI_RXCPPI_MASKED_REG);
 
-	if (!tx && !rx)
+	if (!tx && !rx) {
+		if (cppi->irq)
+			spin_unlock_irqrestore(&musb->lock, flags);
 		return IRQ_NONE;
+	}
 
 	DBG(4, "CPPI IRQ Tx%x Rx%x\n", tx, rx);
 
@@ -1199,7 +1204,7 @@ irqreturn_t cppi_interrupt(int irq, void *dev_id)
 		 */
 		if (NULL == bd) {
 			DBG(1, "null BD\n");
-			tx_ram->tx_complete = 0;
+			musb_writel(&tx_ram->tx_complete, 0, 0);
 			continue;
 		}
 
@@ -1452,7 +1457,7 @@ static int cppi_channel_abort(struct dma_channel *channel)
 		 *    compare mode by writing 1 to the tx_complete register.
 		 */
 		cppi_reset_tx(tx_ram, 1);
-		cppi_ch->head = 0;
+		cppi_ch->head = NULL;
 		musb_writel(&tx_ram->tx_complete, 0, 1);
 		cppi_dump_tx(5, cppi_ch, " (done teardown)");
 
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 630ae7f3cd4c..f10ff00ca09e 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1030,6 +1030,7 @@ static void musb_shutdown(struct platform_device *pdev)
 	struct musb	*musb = dev_to_musb(&pdev->dev);
 	unsigned long	flags;
 
+	pm_runtime_get_sync(musb->controller);
 	spin_lock_irqsave(&musb->lock, flags);
 	musb_platform_disable(musb);
 	musb_generic_disable(musb);
@@ -1040,6 +1041,7 @@ static void musb_shutdown(struct platform_device *pdev)
 	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 	musb_platform_exit(musb);
 
+	pm_runtime_put(musb->controller);
 	/* FIXME power down */
 }
 
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index 4bd9e2145ee4..0e053b587960 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -261,6 +261,7 @@ enum musb_g_ep0_state {
  * @try_ilde:	tries to idle the IP
  * @vbus_status: returns vbus status if possible
  * @set_vbus:	forces vbus status
+ * @channel_program: pre check for standard dma channel_program func
  */
 struct musb_platform_ops {
 	int	(*init)(struct musb *musb);
@@ -274,6 +275,10 @@ struct musb_platform_ops {
 
 	int	(*vbus_status)(struct musb *musb);
 	void	(*set_vbus)(struct musb *musb, int on);
+
+	int	(*adjust_channel_params)(struct dma_channel *channel,
+				u16 packet_sz, u8 *mode,
+				dma_addr_t *dma_addr, u32 *len);
 };
 
 /*
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 98519c5d8b5c..f47c20197c61 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -535,7 +535,7 @@ void musb_g_tx(struct musb *musb, u8 epnum)
 			is_dma = 1;
 			csr |= MUSB_TXCSR_P_WZC_BITS;
 			csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN |
-				 MUSB_TXCSR_TXPKTRDY);
+				 MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET);
 			musb_writew(epio, MUSB_TXCSR, csr);
 			/* Ensure writebuffer is empty. */
 			csr = musb_readw(epio, MUSB_TXCSR);
@@ -1296,7 +1296,7 @@ static int musb_gadget_dequeue(struct usb_ep *ep, struct usb_request *request)
 	}
 
 	/* if the hardware doesn't have the request, easy ... */
-	if (musb_ep->req_list.next != &request->list || musb_ep->busy)
+	if (musb_ep->req_list.next != &req->list || musb_ep->busy)
 		musb_g_giveback(musb_ep, request, -ECONNRESET);
 
 	/* ... else abort the dma transfer ... */
@@ -1887,11 +1887,9 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
 			otg_set_vbus(musb->xceiv, 1);
 
 		hcd->self.uses_pio_for_control = 1;
-
-		if (musb->xceiv->last_event == USB_EVENT_NONE)
-			pm_runtime_put(musb->controller);
-
 	}
+	if (musb->xceiv->last_event == USB_EVENT_NONE)
+		pm_runtime_put(musb->controller);
 
 	return 0;
 
diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c
index 0144a2d481fd..d281792db05c 100644
--- a/drivers/usb/musb/musbhsdma.c
+++ b/drivers/usb/musb/musbhsdma.c
@@ -169,6 +169,14 @@ static int dma_channel_program(struct dma_channel *channel,
 	BUG_ON(channel->status == MUSB_DMA_STATUS_UNKNOWN ||
 		channel->status == MUSB_DMA_STATUS_BUSY);
 
+	/* Let targets check/tweak the arguments */
+	if (musb->ops->adjust_channel_params) {
+		int ret = musb->ops->adjust_channel_params(channel,
+			packet_sz, &mode, &dma_addr, &len);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * The DMA engine in RTL1.8 and above cannot handle
 	 * DMA addresses that are not aligned to a 4 byte boundary.
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 25cb8b0003b1..e9e60b6e0583 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -259,9 +259,10 @@ static int musb_otg_notifications(struct notifier_block *nb,
 	case USB_EVENT_VBUS:
 		DBG(4, "VBUS Connect\n");
 
+#ifdef CONFIG_USB_GADGET_MUSB_HDRC
 		if (musb->gadget_driver)
 			pm_runtime_get_sync(musb->controller);
-
+#endif
 		otg_init(musb->xceiv);
 		break;
 
@@ -269,7 +270,7 @@ static int musb_otg_notifications(struct notifier_block *nb,
 		DBG(4, "VBUS Disconnect\n");
 
 #ifdef CONFIG_USB_GADGET_MUSB_HDRC
-		if (is_otg_enabled(musb))
+		if (is_otg_enabled(musb) || is_peripheral_enabled(musb))
 			if (musb->gadget_driver)
 #endif
 			{
diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c
index d6384e4aeef9..f7e04bf34a13 100644
--- a/drivers/usb/musb/ux500.c
+++ b/drivers/usb/musb/ux500.c
@@ -93,6 +93,8 @@ static int __init ux500_probe(struct platform_device *pdev)
 	}
 
 	musb->dev.parent		= &pdev->dev;
+	musb->dev.dma_mask		= pdev->dev.dma_mask;
+	musb->dev.coherent_dma_mask	= pdev->dev.coherent_dma_mask;
 
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index a973c7a29d6e..4de6ef0ae52a 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -151,6 +151,8 @@ static struct ftdi_sio_quirk ftdi_stmclite_quirk = {
  * /sys/bus/usb/ftdi_sio/new_id, then send patch/report!
  */
 static struct usb_device_id id_table_combined [] = {
+	{ USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) },
@@ -525,6 +527,7 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_8_PID) },
 	{ USB_DEVICE(IDTECH_VID, IDTECH_IDT1221U_PID) },
 	{ USB_DEVICE(OCT_VID, OCT_US101_PID) },
+	{ USB_DEVICE(OCT_VID, OCT_DK201_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_HE_TIRA1_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_HE_TIRA1_quirk },
 	{ USB_DEVICE(FTDI_VID, FTDI_USB_UIRT_PID),
@@ -787,6 +790,8 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(FTDI_VID, HAMEG_HO820_PID) },
+	{ USB_DEVICE(FTDI_VID, HAMEG_HO720_PID) },
+	{ USB_DEVICE(FTDI_VID, HAMEG_HO730_PID) },
 	{ USB_DEVICE(FTDI_VID, HAMEG_HO870_PID) },
 	{ USB_DEVICE(FTDI_VID, MJSG_GENERIC_PID) },
 	{ USB_DEVICE(FTDI_VID, MJSG_SR_RADIO_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index c543e55bafba..efffc23723bd 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -300,6 +300,8 @@
  * Hameg HO820 and HO870 interface (using VID 0x0403)
  */
 #define HAMEG_HO820_PID			0xed74
+#define HAMEG_HO730_PID			0xed73
+#define HAMEG_HO720_PID			0xed72
 #define HAMEG_HO870_PID			0xed71
 
 /*
@@ -572,6 +574,7 @@
 /* Note: OCT US101 is also rebadged as Dick Smith Electronics (NZ) XH6381 */
 /* Also rebadged as Dick Smith Electronics (Aus) XH6451 */
 /* Also rebadged as SIIG Inc. model US2308 hardware version 1 */
+#define OCT_DK201_PID		0x0103	/* OCT DK201 USB docking station */
 #define OCT_US101_PID		0x0421	/* OCT US101 USB to RS-232 */
 
 /*
@@ -1141,3 +1144,12 @@
 #define QIHARDWARE_VID			0x20B7
 #define MILKYMISTONE_JTAGSERIAL_PID	0x0713
 
+/*
+ * CTI GmbH RS485 Converter http://www.cti-lean.com/
+ */
+/* USB-485-Mini*/
+#define FTDI_CTI_MINI_PID	0xF608
+/* USB-Nano-485*/
+#define FTDI_CTI_NANO_PID	0xF60B
+
+
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 75c7f456eed5..d77ff0435896 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -407,6 +407,10 @@ static void option_instat_callback(struct urb *urb);
 /* ONDA MT825UP HSDPA 14.2 modem */
 #define ONDA_MT825UP         0x000b
 
+/* Samsung products */
+#define SAMSUNG_VENDOR_ID                       0x04e8
+#define SAMSUNG_PRODUCT_GT_B3730                0x6889
+
 /* some devices interfaces need special handling due to a number of reasons */
 enum option_blacklist_reason {
 		OPTION_BLACKLIST_NONE = 0,
@@ -968,6 +972,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) },
 	{ USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
 	{ USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
+	{ USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730/GT-B3710 LTE USB modem.*/
 	{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 8858201eb1d3..54a9dab1f33b 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -111,7 +111,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 	ifnum = intf->desc.bInterfaceNumber;
 	dbg("This Interface = %d", ifnum);
 
-	data = serial->private = kzalloc(sizeof(struct usb_wwan_intf_private),
+	data = kzalloc(sizeof(struct usb_wwan_intf_private),
 					 GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -134,8 +134,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 		    usb_endpoint_is_bulk_out(&intf->endpoint[1].desc)) {
 			dbg("QDL port found");
 
-			if (serial->interface->num_altsetting == 1)
-				return 0;
+			if (serial->interface->num_altsetting == 1) {
+				retval = 0; /* Success */
+				break;
+			}
 
 			retval = usb_set_interface(serial->dev, ifnum, 1);
 			if (retval < 0) {
@@ -145,7 +147,6 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 				retval = -ENODEV;
 				kfree(data);
 			}
-			return retval;
 		}
 		break;
 
@@ -166,6 +167,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 					"Could not set interface, error %d\n",
 					retval);
 				retval = -ENODEV;
+				kfree(data);
 			}
 		} else if (ifnum == 2) {
 			dbg("Modem port found");
@@ -177,7 +179,6 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 				retval = -ENODEV;
 				kfree(data);
 			}
-			return retval;
 		} else if (ifnum==3) {
 			/*
 			 * NMEA (serial line 9600 8N1)
@@ -191,6 +192,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 					"Could not set interface, error %d\n",
 					retval);
 				retval = -ENODEV;
+				kfree(data);
 			}
 		}
 		break;
@@ -199,12 +201,27 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 		dev_err(&serial->dev->dev,
 			"unknown number of interfaces: %d\n", nintf);
 		kfree(data);
-		return -ENODEV;
+		retval = -ENODEV;
 	}
 
+	/* Set serial->private if not returning -ENODEV */
+	if (retval != -ENODEV)
+		usb_set_serial_data(serial, data);
 	return retval;
 }
 
+static void qc_release(struct usb_serial *serial)
+{
+	struct usb_wwan_intf_private *priv = usb_get_serial_data(serial);
+
+	dbg("%s", __func__);
+
+	/* Call usb_wwan release & free the private data allocated in qcprobe */
+	usb_wwan_release(serial);
+	usb_set_serial_data(serial, NULL);
+	kfree(priv);
+}
+
 static struct usb_serial_driver qcdevice = {
 	.driver = {
 		.owner     = THIS_MODULE,
@@ -222,7 +239,7 @@ static struct usb_serial_driver qcdevice = {
 	.chars_in_buffer     = usb_wwan_chars_in_buffer,
 	.attach		     = usb_wwan_startup,
 	.disconnect	     = usb_wwan_disconnect,
-	.release	     = usb_wwan_release,
+	.release	     = qc_release,
 #ifdef CONFIG_PM
 	.suspend	     = usb_wwan_suspend,
 	.resume		     = usb_wwan_resume,
diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index 09e52ba47ddf..ffc4193e9505 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -499,7 +499,6 @@ static int isd200_action( struct us_data *us, int action,
 	memset(&ata, 0, sizeof(ata));
 	srb->cmnd = info->cmnd;
 	srb->device = &srb_dev;
-	++srb->serial_number;
 
 	ata.generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
 	ata.generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2ab291241635..7aa4eea930f1 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -4,7 +4,7 @@
  * Author: Michael S. Tsirkin <mst@redhat.com>
  *
  * Inspiration, some code, and most witty comments come from
- * Documentation/lguest/lguest.c, by Rusty Russell
+ * Documentation/virtual/lguest/lguest.c, by Rusty Russell
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  *
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c
index 82acb8dc4aa1..6183a57eb69d 100644
--- a/drivers/video/acornfb.c
+++ b/drivers/video/acornfb.c
@@ -66,7 +66,7 @@
  * have.  Allow 1% either way on the nominal for TVs.
  */
 #define NR_MONTYPES	6
-static struct fb_monspecs monspecs[NR_MONTYPES] __initdata = {
+static struct fb_monspecs monspecs[NR_MONTYPES] __devinitdata = {
 	{	/* TV		*/
 		.hfmin	= 15469,
 		.hfmax	= 15781,
@@ -873,7 +873,7 @@ static struct fb_ops acornfb_ops = {
 /*
  * Everything after here is initialisation!!!
  */
-static struct fb_videomode modedb[] __initdata = {
+static struct fb_videomode modedb[] __devinitdata = {
 	{	/* 320x256 @ 50Hz */
 		NULL, 50,  320,  256, 125000,  92,  62,  35, 19,  38, 2,
 		FB_SYNC_COMP_HIGH_ACT,
@@ -925,8 +925,7 @@ static struct fb_videomode modedb[] __initdata = {
 	}
 };
 
-static struct fb_videomode __initdata
-acornfb_default_mode = {
+static struct fb_videomode acornfb_default_mode __devinitdata = {
 	.name =		NULL,
 	.refresh =	60,
 	.xres =		640,
@@ -942,7 +941,7 @@ acornfb_default_mode = {
 	.vmode =	FB_VMODE_NONINTERLACED
 };
 
-static void __init acornfb_init_fbinfo(void)
+static void __devinit acornfb_init_fbinfo(void)
 {
 	static int first = 1;
 
@@ -1018,8 +1017,7 @@ static void __init acornfb_init_fbinfo(void)
  *	size can optionally be followed by 'M' or 'K' for
  *	MB or KB respectively.
  */
-static void __init
-acornfb_parse_mon(char *opt)
+static void __devinit acornfb_parse_mon(char *opt)
 {
 	char *p = opt;
 
@@ -1066,8 +1064,7 @@ bad:
 	current_par.montype = -1;
 }
 
-static void __init
-acornfb_parse_montype(char *opt)
+static void __devinit acornfb_parse_montype(char *opt)
 {
 	current_par.montype = -2;
 
@@ -1108,8 +1105,7 @@ acornfb_parse_montype(char *opt)
 	}
 }
 
-static void __init
-acornfb_parse_dram(char *opt)
+static void __devinit acornfb_parse_dram(char *opt)
 {
 	unsigned int size;
 
@@ -1134,15 +1130,14 @@ acornfb_parse_dram(char *opt)
 static struct options {
 	char *name;
 	void (*parse)(char *opt);
-} opt_table[] __initdata = {
+} opt_table[] __devinitdata = {
 	{ "mon",     acornfb_parse_mon     },
 	{ "montype", acornfb_parse_montype },
 	{ "dram",    acornfb_parse_dram    },
 	{ NULL, NULL }
 };
 
-int __init
-acornfb_setup(char *options)
+static int __devinit acornfb_setup(char *options)
 {
 	struct options *optp;
 	char *opt;
@@ -1179,8 +1174,7 @@ acornfb_setup(char *options)
  * Detect type of monitor connected
  *  For now, we just assume SVGA
  */
-static int __init
-acornfb_detect_monitortype(void)
+static int __devinit acornfb_detect_monitortype(void)
 {
 	return 4;
 }
diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c
index 5b2b5ef4edba..64e41f5448c4 100644
--- a/drivers/video/atafb.c
+++ b/drivers/video/atafb.c
@@ -3117,7 +3117,7 @@ int __init atafb_init(void)
 			atafb_ops.fb_setcolreg = &falcon_setcolreg;
 			error = request_irq(IRQ_AUTO_4, falcon_vbl_switcher,
 					    IRQ_TYPE_PRIO,
-					    "framebuffer/modeswitch",
+					    "framebuffer:modeswitch",
 					    falcon_vbl_switcher);
 			if (error)
 				return error;
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index e0c2284924b6..5aac00eb1830 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -42,9 +42,34 @@
 
 #define FBPIXMAPSIZE	(1024 * 8)
 
+static DEFINE_MUTEX(registration_lock);
 struct fb_info *registered_fb[FB_MAX] __read_mostly;
 int num_registered_fb __read_mostly;
 
+static struct fb_info *get_fb_info(unsigned int idx)
+{
+	struct fb_info *fb_info;
+
+	if (idx >= FB_MAX)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&registration_lock);
+	fb_info = registered_fb[idx];
+	if (fb_info)
+		atomic_inc(&fb_info->count);
+	mutex_unlock(&registration_lock);
+
+	return fb_info;
+}
+
+static void put_fb_info(struct fb_info *fb_info)
+{
+	if (!atomic_dec_and_test(&fb_info->count))
+		return;
+	if (fb_info->fbops->fb_destroy)
+		fb_info->fbops->fb_destroy(fb_info);
+}
+
 int lock_fb_info(struct fb_info *info)
 {
 	mutex_lock(&info->lock);
@@ -647,6 +672,7 @@ int fb_show_logo(struct fb_info *info, int rotate) { return 0; }
 
 static void *fb_seq_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&registration_lock);
 	return (*pos < FB_MAX) ? pos : NULL;
 }
 
@@ -658,6 +684,7 @@ static void *fb_seq_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void fb_seq_stop(struct seq_file *m, void *v)
 {
+	mutex_unlock(&registration_lock);
 }
 
 static int fb_seq_show(struct seq_file *m, void *v)
@@ -690,13 +717,30 @@ static const struct file_operations fb_proc_fops = {
 	.release	= seq_release,
 };
 
-static ssize_t
-fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+/*
+ * We hold a reference to the fb_info in file->private_data,
+ * but if the current registered fb has changed, we don't
+ * actually want to use it.
+ *
+ * So look up the fb_info using the inode minor number,
+ * and just verify it against the reference we have.
+ */
+static struct fb_info *file_fb_info(struct file *file)
 {
-	unsigned long p = *ppos;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	int fbidx = iminor(inode);
 	struct fb_info *info = registered_fb[fbidx];
+
+	if (info != file->private_data)
+		info = NULL;
+	return info;
+}
+
+static ssize_t
+fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	struct fb_info *info = file_fb_info(file);
 	u8 *buffer, *dst;
 	u8 __iomem *src;
 	int c, cnt = 0, err = 0;
@@ -761,9 +805,7 @@ static ssize_t
 fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	unsigned long p = *ppos;
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int fbidx = iminor(inode);
-	struct fb_info *info = registered_fb[fbidx];
+	struct fb_info *info = file_fb_info(file);
 	u8 *buffer, *src;
 	u8 __iomem *dst;
 	int c, cnt = 0, err = 0;
@@ -1141,10 +1183,10 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 
 static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int fbidx = iminor(inode);
-	struct fb_info *info = registered_fb[fbidx];
+	struct fb_info *info = file_fb_info(file);
 
+	if (!info)
+		return -ENODEV;
 	return do_fb_ioctl(info, cmd, arg);
 }
 
@@ -1265,12 +1307,13 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd,
 static long fb_compat_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int fbidx = iminor(inode);
-	struct fb_info *info = registered_fb[fbidx];
-	struct fb_ops *fb = info->fbops;
+	struct fb_info *info = file_fb_info(file);
+	struct fb_ops *fb;
 	long ret = -ENOIOCTLCMD;
 
+	if (!info)
+		return -ENODEV;
+	fb = info->fbops;
 	switch(cmd) {
 	case FBIOGET_VSCREENINFO:
 	case FBIOPUT_VSCREENINFO:
@@ -1303,16 +1346,18 @@ static long fb_compat_ioctl(struct file *file, unsigned int cmd,
 static int
 fb_mmap(struct file *file, struct vm_area_struct * vma)
 {
-	int fbidx = iminor(file->f_path.dentry->d_inode);
-	struct fb_info *info = registered_fb[fbidx];
-	struct fb_ops *fb = info->fbops;
+	struct fb_info *info = file_fb_info(file);
+	struct fb_ops *fb;
 	unsigned long off;
 	unsigned long start;
 	u32 len;
 
+	if (!info)
+		return -ENODEV;
 	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
 		return -EINVAL;
 	off = vma->vm_pgoff << PAGE_SHIFT;
+	fb = info->fbops;
 	if (!fb)
 		return -ENODEV;
 	mutex_lock(&info->mm_lock);
@@ -1361,14 +1406,16 @@ __releases(&info->lock)
 	struct fb_info *info;
 	int res = 0;
 
-	if (fbidx >= FB_MAX)
-		return -ENODEV;
-	info = registered_fb[fbidx];
-	if (!info)
+	info = get_fb_info(fbidx);
+	if (!info) {
 		request_module("fb%d", fbidx);
-	info = registered_fb[fbidx];
-	if (!info)
-		return -ENODEV;
+		info = get_fb_info(fbidx);
+		if (!info)
+			return -ENODEV;
+	}
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
 	mutex_lock(&info->lock);
 	if (!try_module_get(info->fbops->owner)) {
 		res = -ENODEV;
@@ -1386,6 +1433,8 @@ __releases(&info->lock)
 #endif
 out:
 	mutex_unlock(&info->lock);
+	if (res)
+		put_fb_info(info);
 	return res;
 }
 
@@ -1401,6 +1450,7 @@ __releases(&info->lock)
 		info->fbops->fb_release(info,1);
 	module_put(info->fbops->owner);
 	mutex_unlock(&info->lock);
+	put_fb_info(info);
 	return 0;
 }
 
@@ -1487,8 +1537,10 @@ static bool fb_do_apertures_overlap(struct apertures_struct *gena,
 	return false;
 }
 
+static int do_unregister_framebuffer(struct fb_info *fb_info);
+
 #define VGA_FB_PHYS 0xA0000
-void remove_conflicting_framebuffers(struct apertures_struct *a,
+static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
 				     const char *name, bool primary)
 {
 	int i;
@@ -1510,43 +1562,32 @@ void remove_conflicting_framebuffers(struct apertures_struct *a,
 			printk(KERN_INFO "fb: conflicting fb hw usage "
 			       "%s vs %s - removing generic driver\n",
 			       name, registered_fb[i]->fix.id);
-			unregister_framebuffer(registered_fb[i]);
+			do_unregister_framebuffer(registered_fb[i]);
 		}
 	}
 }
-EXPORT_SYMBOL(remove_conflicting_framebuffers);
 
-/**
- *	register_framebuffer - registers a frame buffer device
- *	@fb_info: frame buffer info structure
- *
- *	Registers a frame buffer device @fb_info.
- *
- *	Returns negative errno on error, or zero for success.
- *
- */
-
-int
-register_framebuffer(struct fb_info *fb_info)
+static int do_register_framebuffer(struct fb_info *fb_info)
 {
 	int i;
 	struct fb_event event;
 	struct fb_videomode mode;
 
-	if (num_registered_fb == FB_MAX)
-		return -ENXIO;
-
 	if (fb_check_foreignness(fb_info))
 		return -ENOSYS;
 
-	remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id,
+	do_remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id,
 					 fb_is_primary_device(fb_info));
 
+	if (num_registered_fb == FB_MAX)
+		return -ENXIO;
+
 	num_registered_fb++;
 	for (i = 0 ; i < FB_MAX; i++)
 		if (!registered_fb[i])
 			break;
 	fb_info->node = i;
+	atomic_set(&fb_info->count, 1);
 	mutex_init(&fb_info->lock);
 	mutex_init(&fb_info->mm_lock);
 
@@ -1592,36 +1633,14 @@ register_framebuffer(struct fb_info *fb_info)
 	return 0;
 }
 
-
-/**
- *	unregister_framebuffer - releases a frame buffer device
- *	@fb_info: frame buffer info structure
- *
- *	Unregisters a frame buffer device @fb_info.
- *
- *	Returns negative errno on error, or zero for success.
- *
- *      This function will also notify the framebuffer console
- *      to release the driver.
- *
- *      This is meant to be called within a driver's module_exit()
- *      function. If this is called outside module_exit(), ensure
- *      that the driver implements fb_open() and fb_release() to
- *      check that no processes are using the device.
- */
-
-int
-unregister_framebuffer(struct fb_info *fb_info)
+static int do_unregister_framebuffer(struct fb_info *fb_info)
 {
 	struct fb_event event;
 	int i, ret = 0;
 
 	i = fb_info->node;
-	if (!registered_fb[i]) {
-		ret = -EINVAL;
-		goto done;
-	}
-
+	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
+		return -EINVAL;
 
 	if (!lock_fb_info(fb_info))
 		return -ENODEV;
@@ -1629,16 +1648,14 @@ unregister_framebuffer(struct fb_info *fb_info)
 	ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event);
 	unlock_fb_info(fb_info);
 
-	if (ret) {
-		ret = -EINVAL;
-		goto done;
-	}
+	if (ret)
+		return -EINVAL;
 
 	if (fb_info->pixmap.addr &&
 	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
 		kfree(fb_info->pixmap.addr);
 	fb_destroy_modelist(&fb_info->modelist);
-	registered_fb[i]=NULL;
+	registered_fb[i] = NULL;
 	num_registered_fb--;
 	fb_cleanup_device(fb_info);
 	device_destroy(fb_class, MKDEV(FB_MAJOR, i));
@@ -1646,9 +1663,65 @@ unregister_framebuffer(struct fb_info *fb_info)
 	fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event);
 
 	/* this may free fb info */
-	if (fb_info->fbops->fb_destroy)
-		fb_info->fbops->fb_destroy(fb_info);
-done:
+	put_fb_info(fb_info);
+	return 0;
+}
+
+void remove_conflicting_framebuffers(struct apertures_struct *a,
+				     const char *name, bool primary)
+{
+	mutex_lock(&registration_lock);
+	do_remove_conflicting_framebuffers(a, name, primary);
+	mutex_unlock(&registration_lock);
+}
+EXPORT_SYMBOL(remove_conflicting_framebuffers);
+
+/**
+ *	register_framebuffer - registers a frame buffer device
+ *	@fb_info: frame buffer info structure
+ *
+ *	Registers a frame buffer device @fb_info.
+ *
+ *	Returns negative errno on error, or zero for success.
+ *
+ */
+int
+register_framebuffer(struct fb_info *fb_info)
+{
+	int ret;
+
+	mutex_lock(&registration_lock);
+	ret = do_register_framebuffer(fb_info);
+	mutex_unlock(&registration_lock);
+
+	return ret;
+}
+
+/**
+ *	unregister_framebuffer - releases a frame buffer device
+ *	@fb_info: frame buffer info structure
+ *
+ *	Unregisters a frame buffer device @fb_info.
+ *
+ *	Returns negative errno on error, or zero for success.
+ *
+ *      This function will also notify the framebuffer console
+ *      to release the driver.
+ *
+ *      This is meant to be called within a driver's module_exit()
+ *      function. If this is called outside module_exit(), ensure
+ *      that the driver implements fb_open() and fb_release() to
+ *      check that no processes are using the device.
+ */
+int
+unregister_framebuffer(struct fb_info *fb_info)
+{
+	int ret;
+
+	mutex_lock(&registration_lock);
+	ret = do_unregister_framebuffer(fb_info);
+	mutex_unlock(&registration_lock);
+
 	return ret;
 }
 
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index a2e5b5100ab4..0f4e8c942f9e 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -1648,7 +1648,9 @@ pxafb_freq_transition(struct notifier_block *nb, unsigned long val, void *data)
 
 	switch (val) {
 	case CPUFREQ_PRECHANGE:
-		if (!fbi->overlay[0].usage && !fbi->overlay[1].usage)
+#ifdef CONFIG_FB_PXA_OVERLAY
+		if (!(fbi->overlay[0].usage || fbi->overlay[1].usage))
+#endif
 			set_ctrlr_state(fbi, C_DISABLE_CLKCHANGE);
 		break;
 
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index 68041d9dc260..695066b5b2e6 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -27,6 +27,7 @@
 #include <linux/fb.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 #include <linux/delay.h>
 #include <video/udlfb.h>
 #include "edid.h"
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 4fb5b2bf2348..4bcc8b82640b 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -590,15 +590,10 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 
 static void virtio_pci_release_dev(struct device *_d)
 {
-	struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
+	struct virtio_device *dev = container_of(_d, struct virtio_device,
+						 dev);
 	struct virtio_pci_device *vp_dev = to_vp_device(dev);
-	struct pci_dev *pci_dev = vp_dev->pci_dev;
 
-	vp_del_vqs(dev);
-	pci_set_drvdata(pci_dev, NULL);
-	pci_iounmap(pci_dev, vp_dev->ioaddr);
-	pci_release_regions(pci_dev);
-	pci_disable_device(pci_dev);
 	kfree(vp_dev);
 }
 
@@ -681,6 +676,12 @@ static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 
 	unregister_virtio_device(&vp_dev->vdev);
+
+	vp_del_vqs(&vp_dev->vdev);
+	pci_set_drvdata(pci_dev, NULL);
+	pci_iounmap(pci_dev, vp_dev->ioaddr);
+	pci_release_regions(pci_dev);
+	pci_disable_device(pci_dev);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cc2f73e03475..b0043fb26a4d 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -371,6 +371,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 		/* detach_buf clears data, so grab it now. */
 		buf = vq->data[i];
 		detach_buf(vq, i);
+		vq->vring.avail->idx--;
 		END_USE(vq);
 		return buf;
 	}
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 1b0f98bc51b5..022f9eb0b7bf 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -990,6 +990,12 @@ config BCM63XX_WDT
 	  To compile this driver as a loadable module, choose M here.
 	  The module will be called bcm63xx_wdt.
 
+config LANTIQ_WDT
+	tristate "Lantiq SoC watchdog"
+	depends on LANTIQ
+	help
+	  Hardware driver for the Lantiq SoC Watchdog Timer.
+
 # PARISC Architecture
 
 # POWERPC Architecture
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 3f8608b922a7..ed26f7094e47 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -123,6 +123,7 @@ obj-$(CONFIG_AR7_WDT) += ar7_wdt.o
 obj-$(CONFIG_TXX9_WDT) += txx9wdt.o
 obj-$(CONFIG_OCTEON_WDT) += octeon-wdt.o
 octeon-wdt-y := octeon-wdt-main.o octeon-wdt-nmi.o
+obj-$(CONFIG_LANTIQ_WDT) += lantiq_wdt.o
 
 # PARISC Architecture
 
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 35a0d12dad73..5fd020da7c55 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -35,6 +35,7 @@
  *	document number 324645-001, 324646-001: Cougar Point (CPT)
  *	document number TBD                   : Patsburg (PBG)
  *	document number TBD                   : DH89xxCC
+ *	document number TBD                   : Panther Point
  */
 
 /*
@@ -153,6 +154,38 @@ enum iTCO_chipsets {
 	TCO_PBG1,	/* Patsburg */
 	TCO_PBG2,	/* Patsburg */
 	TCO_DH89XXCC,	/* DH89xxCC */
+	TCO_PPT0,	/* Panther Point */
+	TCO_PPT1,	/* Panther Point */
+	TCO_PPT2,	/* Panther Point */
+	TCO_PPT3,	/* Panther Point */
+	TCO_PPT4,	/* Panther Point */
+	TCO_PPT5,	/* Panther Point */
+	TCO_PPT6,	/* Panther Point */
+	TCO_PPT7,	/* Panther Point */
+	TCO_PPT8,	/* Panther Point */
+	TCO_PPT9,	/* Panther Point */
+	TCO_PPT10,	/* Panther Point */
+	TCO_PPT11,	/* Panther Point */
+	TCO_PPT12,	/* Panther Point */
+	TCO_PPT13,	/* Panther Point */
+	TCO_PPT14,	/* Panther Point */
+	TCO_PPT15,	/* Panther Point */
+	TCO_PPT16,	/* Panther Point */
+	TCO_PPT17,	/* Panther Point */
+	TCO_PPT18,	/* Panther Point */
+	TCO_PPT19,	/* Panther Point */
+	TCO_PPT20,	/* Panther Point */
+	TCO_PPT21,	/* Panther Point */
+	TCO_PPT22,	/* Panther Point */
+	TCO_PPT23,	/* Panther Point */
+	TCO_PPT24,	/* Panther Point */
+	TCO_PPT25,	/* Panther Point */
+	TCO_PPT26,	/* Panther Point */
+	TCO_PPT27,	/* Panther Point */
+	TCO_PPT28,	/* Panther Point */
+	TCO_PPT29,	/* Panther Point */
+	TCO_PPT30,	/* Panther Point */
+	TCO_PPT31,	/* Panther Point */
 };
 
 static struct {
@@ -244,6 +277,38 @@ static struct {
 	{"Patsburg", 2},
 	{"Patsburg", 2},
 	{"DH89xxCC", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
+	{"Panther Point", 2},
 	{NULL, 0}
 };
 
@@ -363,6 +428,38 @@ static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = {
 	{ ITCO_PCI_DEVICE(0x1d40,				TCO_PBG1)},
 	{ ITCO_PCI_DEVICE(0x1d41,				TCO_PBG2)},
 	{ ITCO_PCI_DEVICE(0x2310,				TCO_DH89XXCC)},
+	{ ITCO_PCI_DEVICE(0x1e40,				TCO_PPT0)},
+	{ ITCO_PCI_DEVICE(0x1e41,				TCO_PPT1)},
+	{ ITCO_PCI_DEVICE(0x1e42,				TCO_PPT2)},
+	{ ITCO_PCI_DEVICE(0x1e43,				TCO_PPT3)},
+	{ ITCO_PCI_DEVICE(0x1e44,				TCO_PPT4)},
+	{ ITCO_PCI_DEVICE(0x1e45,				TCO_PPT5)},
+	{ ITCO_PCI_DEVICE(0x1e46,				TCO_PPT6)},
+	{ ITCO_PCI_DEVICE(0x1e47,				TCO_PPT7)},
+	{ ITCO_PCI_DEVICE(0x1e48,				TCO_PPT8)},
+	{ ITCO_PCI_DEVICE(0x1e49,				TCO_PPT9)},
+	{ ITCO_PCI_DEVICE(0x1e4a,				TCO_PPT10)},
+	{ ITCO_PCI_DEVICE(0x1e4b,				TCO_PPT11)},
+	{ ITCO_PCI_DEVICE(0x1e4c,				TCO_PPT12)},
+	{ ITCO_PCI_DEVICE(0x1e4d,				TCO_PPT13)},
+	{ ITCO_PCI_DEVICE(0x1e4e,				TCO_PPT14)},
+	{ ITCO_PCI_DEVICE(0x1e4f,				TCO_PPT15)},
+	{ ITCO_PCI_DEVICE(0x1e50,				TCO_PPT16)},
+	{ ITCO_PCI_DEVICE(0x1e51,				TCO_PPT17)},
+	{ ITCO_PCI_DEVICE(0x1e52,				TCO_PPT18)},
+	{ ITCO_PCI_DEVICE(0x1e53,				TCO_PPT19)},
+	{ ITCO_PCI_DEVICE(0x1e54,				TCO_PPT20)},
+	{ ITCO_PCI_DEVICE(0x1e55,				TCO_PPT21)},
+	{ ITCO_PCI_DEVICE(0x1e56,				TCO_PPT22)},
+	{ ITCO_PCI_DEVICE(0x1e57,				TCO_PPT23)},
+	{ ITCO_PCI_DEVICE(0x1e58,				TCO_PPT24)},
+	{ ITCO_PCI_DEVICE(0x1e59,				TCO_PPT25)},
+	{ ITCO_PCI_DEVICE(0x1e5a,				TCO_PPT26)},
+	{ ITCO_PCI_DEVICE(0x1e5b,				TCO_PPT27)},
+	{ ITCO_PCI_DEVICE(0x1e5c,				TCO_PPT28)},
+	{ ITCO_PCI_DEVICE(0x1e5d,				TCO_PPT29)},
+	{ ITCO_PCI_DEVICE(0x1e5e,				TCO_PPT30)},
+	{ ITCO_PCI_DEVICE(0x1e5f,				TCO_PPT31)},
 	{ 0, },			/* End of list */
 };
 MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl);
diff --git a/drivers/watchdog/lantiq_wdt.c b/drivers/watchdog/lantiq_wdt.c
new file mode 100644
index 000000000000..7d82adac1cb2
--- /dev/null
+++ b/drivers/watchdog/lantiq_wdt.c
@@ -0,0 +1,261 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ *  Based on EP93xx wdt driver
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/watchdog.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+
+#include <lantiq.h>
+
+/* Section 3.4 of the datasheet
+ * The password sequence protects the WDT control register from unintended
+ * write actions, which might cause malfunction of the WDT.
+ *
+ * essentially the following two magic passwords need to be written to allow
+ * IO access to the WDT core
+ */
+#define LTQ_WDT_PW1		0x00BE0000
+#define LTQ_WDT_PW2		0x00DC0000
+
+#define LTQ_WDT_CR		0x0	/* watchdog control register */
+#define LTQ_WDT_SR		0x8	/* watchdog status register */
+
+#define LTQ_WDT_SR_EN		(0x1 << 31)	/* enable bit */
+#define LTQ_WDT_SR_PWD		(0x3 << 26)	/* turn on power */
+#define LTQ_WDT_SR_CLKDIV	(0x3 << 24)	/* turn on clock and set */
+						/* divider to 0x40000 */
+#define LTQ_WDT_DIVIDER		0x40000
+#define LTQ_MAX_TIMEOUT		((1 << 16) - 1)	/* the reload field is 16 bit */
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+
+static void __iomem *ltq_wdt_membase;
+static unsigned long ltq_io_region_clk_rate;
+
+static unsigned long ltq_wdt_bootstatus;
+static unsigned long ltq_wdt_in_use;
+static int ltq_wdt_timeout = 30;
+static int ltq_wdt_ok_to_close;
+
+static void
+ltq_wdt_enable(void)
+{
+	ltq_wdt_timeout = ltq_wdt_timeout *
+			(ltq_io_region_clk_rate / LTQ_WDT_DIVIDER) + 0x1000;
+	if (ltq_wdt_timeout > LTQ_MAX_TIMEOUT)
+		ltq_wdt_timeout = LTQ_MAX_TIMEOUT;
+
+	/* write the first password magic */
+	ltq_w32(LTQ_WDT_PW1, ltq_wdt_membase + LTQ_WDT_CR);
+	/* write the second magic plus the configuration and new timeout */
+	ltq_w32(LTQ_WDT_SR_EN | LTQ_WDT_SR_PWD | LTQ_WDT_SR_CLKDIV |
+		LTQ_WDT_PW2 | ltq_wdt_timeout, ltq_wdt_membase + LTQ_WDT_CR);
+}
+
+static void
+ltq_wdt_disable(void)
+{
+	/* write the first password magic */
+	ltq_w32(LTQ_WDT_PW1, ltq_wdt_membase + LTQ_WDT_CR);
+	/* write the second password magic with no config
+	 * this turns the watchdog off
+	 */
+	ltq_w32(LTQ_WDT_PW2, ltq_wdt_membase + LTQ_WDT_CR);
+}
+
+static ssize_t
+ltq_wdt_write(struct file *file, const char __user *data,
+		size_t len, loff_t *ppos)
+{
+	if (len) {
+		if (!nowayout) {
+			size_t i;
+
+			ltq_wdt_ok_to_close = 0;
+			for (i = 0; i != len; i++) {
+				char c;
+
+				if (get_user(c, data + i))
+					return -EFAULT;
+				if (c == 'V')
+					ltq_wdt_ok_to_close = 1;
+				else
+					ltq_wdt_ok_to_close = 0;
+			}
+		}
+		ltq_wdt_enable();
+	}
+
+	return len;
+}
+
+static struct watchdog_info ident = {
+	.options = WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
+			WDIOF_CARDRESET,
+	.identity = "ltq_wdt",
+};
+
+static long
+ltq_wdt_ioctl(struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	int ret = -ENOTTY;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user((struct watchdog_info __user *)arg, &ident,
+				sizeof(ident)) ? -EFAULT : 0;
+		break;
+
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(ltq_wdt_bootstatus, (int __user *)arg);
+		break;
+
+	case WDIOC_GETSTATUS:
+		ret = put_user(0, (int __user *)arg);
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(ltq_wdt_timeout, (int __user *)arg);
+		if (!ret)
+			ltq_wdt_enable();
+		/* intentional drop through */
+	case WDIOC_GETTIMEOUT:
+		ret = put_user(ltq_wdt_timeout, (int __user *)arg);
+		break;
+
+	case WDIOC_KEEPALIVE:
+		ltq_wdt_enable();
+		ret = 0;
+		break;
+	}
+	return ret;
+}
+
+static int
+ltq_wdt_open(struct inode *inode, struct file *file)
+{
+	if (test_and_set_bit(0, &ltq_wdt_in_use))
+		return -EBUSY;
+	ltq_wdt_in_use = 1;
+	ltq_wdt_enable();
+
+	return nonseekable_open(inode, file);
+}
+
+static int
+ltq_wdt_release(struct inode *inode, struct file *file)
+{
+	if (ltq_wdt_ok_to_close)
+		ltq_wdt_disable();
+	else
+		pr_err("ltq_wdt: watchdog closed without warning\n");
+	ltq_wdt_ok_to_close = 0;
+	clear_bit(0, &ltq_wdt_in_use);
+
+	return 0;
+}
+
+static const struct file_operations ltq_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.write		= ltq_wdt_write,
+	.unlocked_ioctl	= ltq_wdt_ioctl,
+	.open		= ltq_wdt_open,
+	.release	= ltq_wdt_release,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice ltq_wdt_miscdev = {
+	.minor	= WATCHDOG_MINOR,
+	.name	= "watchdog",
+	.fops	= &ltq_wdt_fops,
+};
+
+static int __init
+ltq_wdt_probe(struct platform_device *pdev)
+{
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	struct clk *clk;
+
+	if (!res) {
+		dev_err(&pdev->dev, "cannot obtain I/O memory region");
+		return -ENOENT;
+	}
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "cannot request I/O memory region");
+		return -EBUSY;
+	}
+	ltq_wdt_membase = devm_ioremap_nocache(&pdev->dev, res->start,
+		resource_size(res));
+	if (!ltq_wdt_membase) {
+		dev_err(&pdev->dev, "cannot remap I/O memory region\n");
+		return -ENOMEM;
+	}
+
+	/* we do not need to enable the clock as it is always running */
+	clk = clk_get(&pdev->dev, "io");
+	WARN_ON(!clk);
+	ltq_io_region_clk_rate = clk_get_rate(clk);
+	clk_put(clk);
+
+	if (ltq_reset_cause() == LTQ_RST_CAUSE_WDTRST)
+		ltq_wdt_bootstatus = WDIOF_CARDRESET;
+
+	return misc_register(&ltq_wdt_miscdev);
+}
+
+static int __devexit
+ltq_wdt_remove(struct platform_device *pdev)
+{
+	misc_deregister(&ltq_wdt_miscdev);
+
+	if (ltq_wdt_membase)
+		iounmap(ltq_wdt_membase);
+
+	return 0;
+}
+
+
+static struct platform_driver ltq_wdt_driver = {
+	.remove = __devexit_p(ltq_wdt_remove),
+	.driver = {
+		.name = "ltq_wdt",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init
+init_ltq_wdt(void)
+{
+	return platform_driver_probe(&ltq_wdt_driver, ltq_wdt_probe);
+}
+
+static void __exit
+exit_ltq_wdt(void)
+{
+	return platform_driver_unregister(&ltq_wdt_driver);
+}
+
+module_init(init_ltq_wdt);
+module_exit(exit_ltq_wdt);
+
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started");
+
+MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_DESCRIPTION("Lantiq SoC Watchdog");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index 528bceb220fd..eed5436ffb51 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -185,17 +185,20 @@ static struct miscdevice mpc8xxx_wdt_miscdev = {
 	.fops	= &mpc8xxx_wdt_fops,
 };
 
+static const struct of_device_id mpc8xxx_wdt_match[];
 static int __devinit mpc8xxx_wdt_probe(struct platform_device *ofdev)
 {
 	int ret;
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct mpc8xxx_wdt_type *wdt_type;
 	u32 freq = fsl_get_sys_freq();
 	bool enabled;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	wdt_type = ofdev->dev.of_match->data;
+	wdt_type = match->data;
 
 	if (!freq || freq == -1)
 		return -EINVAL;
diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c
index 5ec5ac1f7878..1479dc4d6129 100644
--- a/drivers/watchdog/mtx-1_wdt.c
+++ b/drivers/watchdog/mtx-1_wdt.c
@@ -66,6 +66,7 @@ static struct {
 	int default_ticks;
 	unsigned long inuse;
 	unsigned gpio;
+	int gstate;
 } mtx1_wdt_device;
 
 static void mtx1_wdt_trigger(unsigned long unused)
@@ -75,13 +76,13 @@ static void mtx1_wdt_trigger(unsigned long unused)
 	spin_lock(&mtx1_wdt_device.lock);
 	if (mtx1_wdt_device.running)
 		ticks--;
-	/*
-	 * toggle GPIO2_15
-	 */
-	tmp = au_readl(GPIO2_DIR);
-	tmp = (tmp & ~(1 << mtx1_wdt_device.gpio)) |
-	      ((~tmp) & (1 << mtx1_wdt_device.gpio));
-	au_writel(tmp, GPIO2_DIR);
+
+	/* toggle wdt gpio */
+	mtx1_wdt_device.gstate = ~mtx1_wdt_device.gstate;
+	if (mtx1_wdt_device.gstate)
+		gpio_direction_output(mtx1_wdt_device.gpio, 1);
+	else
+		gpio_direction_input(mtx1_wdt_device.gpio);
 
 	if (mtx1_wdt_device.queue && ticks)
 		mod_timer(&mtx1_wdt_device.timer, jiffies + MTX1_WDT_INTERVAL);
@@ -103,7 +104,8 @@ static void mtx1_wdt_start(void)
 	spin_lock_irqsave(&mtx1_wdt_device.lock, flags);
 	if (!mtx1_wdt_device.queue) {
 		mtx1_wdt_device.queue = 1;
-		gpio_set_value(mtx1_wdt_device.gpio, 1);
+		mtx1_wdt_device.gstate = 1;
+		gpio_direction_output(mtx1_wdt_device.gpio, 1);
 		mod_timer(&mtx1_wdt_device.timer, jiffies + MTX1_WDT_INTERVAL);
 	}
 	mtx1_wdt_device.running++;
@@ -117,7 +119,8 @@ static int mtx1_wdt_stop(void)
 	spin_lock_irqsave(&mtx1_wdt_device.lock, flags);
 	if (mtx1_wdt_device.queue) {
 		mtx1_wdt_device.queue = 0;
-		gpio_set_value(mtx1_wdt_device.gpio, 0);
+		mtx1_wdt_device.gstate = 0;
+		gpio_direction_output(mtx1_wdt_device.gpio, 0);
 	}
 	ticks = mtx1_wdt_device.default_ticks;
 	spin_unlock_irqrestore(&mtx1_wdt_device.lock, flags);
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index f420f1ff7f13..4781f806701d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,21 +4,21 @@ obj-y	+= xenbus/
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_features.o			:= $(nostackp)
 
-obj-$(CONFIG_BLOCK)		+= biomerge.o
-obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
-obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
-obj-$(CONFIG_XEN_BALLOON)	+= xen-balloon.o
-obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
-obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+obj-$(CONFIG_BLOCK)			+= biomerge.o
+obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
+obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
+obj-$(CONFIG_XEN_BALLOON)		+= xen-balloon.o
+obj-$(CONFIG_XEN_DEV_EVTCHN)		+= xen-evtchn.o
+obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
 obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
-obj-$(CONFIG_XENFS)		+= xenfs/
+obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
-obj-$(CONFIG_SWIOTLB_XEN)	+= swiotlb-xen.o
-obj-$(CONFIG_XEN_DOM0)		+= pci.o
+obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
+obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
+obj-$(CONFIG_XEN_DOM0)			+= pci.o
 
-xen-evtchn-y			:= evtchn.o
+xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
 
-xen-platform-pci-y		:= platform-pci.o
+xen-platform-pci-y			:= platform-pci.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 043af8ad6b60..f54290baa3db 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -114,7 +114,6 @@ static void __balloon_append(struct page *page)
 	if (PageHighMem(page)) {
 		list_add_tail(&page->lru, &ballooned_pages);
 		balloon_stats.balloon_high++;
-		dec_totalhigh_pages();
 	} else {
 		list_add(&page->lru, &ballooned_pages);
 		balloon_stats.balloon_low++;
@@ -124,6 +123,8 @@ static void __balloon_append(struct page *page)
 static void balloon_append(struct page *page)
 {
 	__balloon_append(page);
+	if (PageHighMem(page))
+		dec_totalhigh_pages();
 	totalram_pages--;
 }
 
@@ -193,7 +194,7 @@ static enum bp_state update_schedule(enum bp_state state)
 	return BP_EAGAIN;
 }
 
-static unsigned long current_target(void)
+static long current_credit(void)
 {
 	unsigned long target = balloon_stats.target_pages;
 
@@ -202,7 +203,7 @@ static unsigned long current_target(void)
 		     balloon_stats.balloon_low +
 		     balloon_stats.balloon_high);
 
-	return target;
+	return target - balloon_stats.current_pages;
 }
 
 static enum bp_state increase_reservation(unsigned long nr_pages)
@@ -246,7 +247,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 		set_phys_to_machine(pfn, frame_list[i]);
 
 		/* Link back into the page tables if not highmem. */
-		if (!xen_hvm_domain() && pfn < max_low_pfn) {
+		if (xen_pv_domain() && !PageHighMem(page)) {
 			int ret;
 			ret = HYPERVISOR_update_va_mapping(
 				(unsigned long)__va(pfn << PAGE_SHIFT),
@@ -293,7 +294,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 
 		scrub_page(page);
 
-		if (!xen_hvm_domain() && !PageHighMem(page)) {
+		if (xen_pv_domain() && !PageHighMem(page)) {
 			ret = HYPERVISOR_update_va_mapping(
 				(unsigned long)__va(pfn << PAGE_SHIFT),
 				__pte_ma(0), 0);
@@ -337,7 +338,7 @@ static void balloon_process(struct work_struct *work)
 	mutex_lock(&balloon_mutex);
 
 	do {
-		credit = current_target() - balloon_stats.current_pages;
+		credit = current_credit();
 
 		if (credit > 0)
 			state = increase_reservation(credit);
@@ -420,7 +421,7 @@ void free_xenballooned_pages(int nr_pages, struct page** pages)
 	}
 
 	/* The balloon may be too large now. Shrink it if needed. */
-	if (current_target() != balloon_stats.current_pages)
+	if (current_credit())
 		schedule_delayed_work(&balloon_worker, 0);
 
 	mutex_unlock(&balloon_mutex);
@@ -429,7 +430,7 @@ EXPORT_SYMBOL(free_xenballooned_pages);
 
 static int __init balloon_init(void)
 {
- 	unsigned long pfn, nr_pages, extra_pfn_end;
+	unsigned long pfn, extra_pfn_end;
 	struct page *page;
 
 	if (!xen_domain())
@@ -437,11 +438,7 @@ static int __init balloon_init(void)
 
 	pr_info("xen/balloon: Initialising balloon driver.\n");
 
- 	if (xen_pv_domain())
- 		nr_pages = xen_start_info->nr_pages;
- 	else
- 		nr_pages = max_pfn;
- 	balloon_stats.current_pages = min(nr_pages, max_pfn);
+	balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
 	balloon_stats.target_pages  = balloon_stats.current_pages;
 	balloon_stats.balloon_low   = 0;
 	balloon_stats.balloon_high  = 0;
@@ -466,7 +463,7 @@ static int __init balloon_init(void)
 	     pfn < extra_pfn_end;
 	     pfn++) {
 		page = pfn_to_page(pfn);
-		/* totalram_pages doesn't include the boot-time
+		/* totalram_pages and totalhigh_pages do not include the boot-time
 		   balloon extension, so don't subtract from it. */
 		__balloon_append(page);
 	}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 42d6c930cc87..3ff822b48145 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -101,6 +101,7 @@ struct irq_info
 			unsigned short gsi;
 			unsigned char vector;
 			unsigned char flags;
+			uint16_t domid;
 		} pirq;
 	} u;
 };
@@ -118,6 +119,8 @@ static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
 static struct irq_chip xen_dynamic_chip;
 static struct irq_chip xen_percpu_chip;
 static struct irq_chip xen_pirq_chip;
+static void enable_dynirq(struct irq_data *data);
+static void disable_dynirq(struct irq_data *data);
 
 /* Get info for IRQ */
 static struct irq_info *info_for_irq(unsigned irq)
@@ -184,6 +187,7 @@ static void xen_irq_info_pirq_init(unsigned irq,
 				   unsigned short pirq,
 				   unsigned short gsi,
 				   unsigned short vector,
+				   uint16_t domid,
 				   unsigned char flags)
 {
 	struct irq_info *info = info_for_irq(irq);
@@ -193,6 +197,7 @@ static void xen_irq_info_pirq_init(unsigned irq,
 	info->u.pirq.pirq = pirq;
 	info->u.pirq.gsi = gsi;
 	info->u.pirq.vector = vector;
+	info->u.pirq.domid = domid;
 	info->u.pirq.flags = flags;
 }
 
@@ -473,16 +478,6 @@ static void xen_free_irq(unsigned irq)
 	irq_free_desc(irq);
 }
 
-static void pirq_unmask_notify(int irq)
-{
-	struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
-
-	if (unlikely(pirq_needs_eoi(irq))) {
-		int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
-		WARN_ON(rc);
-	}
-}
-
 static void pirq_query_unmask(int irq)
 {
 	struct physdev_irq_status_query irq_status;
@@ -506,6 +501,29 @@ static bool probing_irq(int irq)
 	return desc && desc->action == NULL;
 }
 
+static void eoi_pirq(struct irq_data *data)
+{
+	int evtchn = evtchn_from_irq(data->irq);
+	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
+	int rc = 0;
+
+	irq_move_irq(data);
+
+	if (VALID_EVTCHN(evtchn))
+		clear_evtchn(evtchn);
+
+	if (pirq_needs_eoi(data->irq)) {
+		rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+		WARN_ON(rc);
+	}
+}
+
+static void mask_ack_pirq(struct irq_data *data)
+{
+	disable_dynirq(data);
+	eoi_pirq(data);
+}
+
 static unsigned int __startup_pirq(unsigned int irq)
 {
 	struct evtchn_bind_pirq bind_pirq;
@@ -539,7 +557,7 @@ static unsigned int __startup_pirq(unsigned int irq)
 
 out:
 	unmask_evtchn(evtchn);
-	pirq_unmask_notify(irq);
+	eoi_pirq(irq_get_irq_data(irq));
 
 	return 0;
 }
@@ -579,18 +597,7 @@ static void enable_pirq(struct irq_data *data)
 
 static void disable_pirq(struct irq_data *data)
 {
-}
-
-static void ack_pirq(struct irq_data *data)
-{
-	int evtchn = evtchn_from_irq(data->irq);
-
-	irq_move_irq(data);
-
-	if (VALID_EVTCHN(evtchn)) {
-		mask_evtchn(evtchn);
-		clear_evtchn(evtchn);
-	}
+	disable_dynirq(data);
 }
 
 static int find_irq_by_gsi(unsigned gsi)
@@ -639,9 +646,6 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 	if (irq < 0)
 		goto out;
 
-	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq,
-				      name);
-
 	irq_op.irq = irq;
 	irq_op.vector = 0;
 
@@ -655,9 +659,35 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 		goto out;
 	}
 
-	xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector,
+	xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
 			       shareable ? PIRQ_SHAREABLE : 0);
 
+	pirq_query_unmask(irq);
+	/* We try to use the handler with the appropriate semantic for the
+	 * type of interrupt: if the interrupt doesn't need an eoi
+	 * (pirq_needs_eoi returns false), we treat it like an edge
+	 * triggered interrupt so we use handle_edge_irq.
+	 * As a matter of fact this only happens when the corresponding
+	 * physical interrupt is edge triggered or an msi.
+	 *
+	 * On the other hand if the interrupt needs an eoi (pirq_needs_eoi
+	 * returns true) we treat it like a level triggered interrupt so we
+	 * use handle_fasteoi_irq like the native code does for this kind of
+	 * interrupts.
+	 * Depending on the Xen version, pirq_needs_eoi might return true
+	 * not only for level triggered interrupts but for edge triggered
+	 * interrupts too. In any case Xen always honors the eoi mechanism,
+	 * not injecting any more pirqs of the same kind if the first one
+	 * hasn't received an eoi yet. Therefore using the fasteoi handler
+	 * is the right choice either way.
+	 */
+	if (pirq_needs_eoi(irq))
+		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+				handle_fasteoi_irq, name);
+	else
+		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+				handle_edge_irq, name);
+
 out:
 	spin_unlock(&irq_mapping_update_lock);
 
@@ -680,7 +710,8 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
 }
 
 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-			     int pirq, int vector, const char *name)
+			     int pirq, int vector, const char *name,
+			     domid_t domid)
 {
 	int irq, ret;
 
@@ -690,10 +721,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	if (irq == -1)
 		goto out;
 
-	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq,
-				      name);
+	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
+			name);
 
-	xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0);
+	xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
 	ret = irq_set_msi_desc(irq, msidesc);
 	if (ret < 0)
 		goto error_irq;
@@ -722,9 +753,16 @@ int xen_destroy_irq(int irq)
 
 	if (xen_initial_domain()) {
 		unmap_irq.pirq = info->u.pirq.pirq;
-		unmap_irq.domid = DOMID_SELF;
+		unmap_irq.domid = info->u.pirq.domid;
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
-		if (rc) {
+		/* If another domain quits without making the pci_disable_msix
+		 * call, the Xen hypervisor takes care of freeing the PIRQs
+		 * (free_domain_pirqs).
+		 */
+		if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
+			printk(KERN_INFO "domain %d does not have %d anymore\n",
+				info->u.pirq.domid, info->u.pirq.pirq);
+		else if (rc) {
 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
 			goto out;
 		}
@@ -759,6 +797,12 @@ out:
 	return irq;
 }
 
+
+int xen_pirq_from_irq(unsigned irq)
+{
+	return pirq_from_irq(irq);
+}
+EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
 int bind_evtchn_to_irq(unsigned int evtchn)
 {
 	int irq;
@@ -773,7 +817,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 			goto out;
 
 		irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_fasteoi_irq, "event");
+					      handle_edge_irq, "event");
 
 		xen_irq_info_evtchn_init(irq, evtchn);
 	}
@@ -912,8 +956,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
 			      unsigned long irqflags,
 			      const char *devname, void *dev_id)
 {
-	unsigned int irq;
-	int retval;
+	int irq, retval;
 
 	irq = bind_evtchn_to_irq(evtchn);
 	if (irq < 0)
@@ -955,8 +998,7 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
 			    irq_handler_t handler,
 			    unsigned long irqflags, const char *devname, void *dev_id)
 {
-	unsigned int irq;
-	int retval;
+	int irq, retval;
 
 	irq = bind_virq_to_irq(virq, cpu);
 	if (irq < 0)
@@ -1181,9 +1223,6 @@ static void __xen_evtchn_do_upcall(void)
 				port = (word_idx * BITS_PER_LONG) + bit_idx;
 				irq = evtchn_to_irq[port];
 
-				mask_evtchn(port);
-				clear_evtchn(port);
-
 				if (irq != -1) {
 					desc = irq_to_desc(irq);
 					if (desc)
@@ -1339,10 +1378,16 @@ static void ack_dynirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
 
-	irq_move_masked_irq(data);
+	irq_move_irq(data);
 
 	if (VALID_EVTCHN(evtchn))
-		unmask_evtchn(evtchn);
+		clear_evtchn(evtchn);
+}
+
+static void mask_ack_dynirq(struct irq_data *data)
+{
+	disable_dynirq(data);
+	ack_dynirq(data);
 }
 
 static int retrigger_dynirq(struct irq_data *data)
@@ -1504,6 +1549,18 @@ void xen_poll_irq(int irq)
 	xen_poll_irq_timeout(irq, 0 /* no timeout */);
 }
 
+/* Check whether the IRQ line is shared with other guests. */
+int xen_test_irq_shared(int irq)
+{
+	struct irq_info *info = info_for_irq(irq);
+	struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
+
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
+		return 0;
+	return !(irq_status.flags & XENIRQSTAT_shared);
+}
+EXPORT_SYMBOL_GPL(xen_test_irq_shared);
+
 void xen_irq_resume(void)
 {
 	unsigned int cpu, evtchn;
@@ -1537,7 +1594,9 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
 	.irq_mask		= disable_dynirq,
 	.irq_unmask		= enable_dynirq,
 
-	.irq_eoi		= ack_dynirq,
+	.irq_ack		= ack_dynirq,
+	.irq_mask_ack		= mask_ack_dynirq,
+
 	.irq_set_affinity	= set_affinity_irq,
 	.irq_retrigger		= retrigger_dynirq,
 };
@@ -1547,14 +1606,15 @@ static struct irq_chip xen_pirq_chip __read_mostly = {
 
 	.irq_startup		= startup_pirq,
 	.irq_shutdown		= shutdown_pirq,
-
 	.irq_enable		= enable_pirq,
-	.irq_unmask		= enable_pirq,
-
 	.irq_disable		= disable_pirq,
-	.irq_mask		= disable_pirq,
 
-	.irq_ack		= ack_pirq,
+	.irq_mask		= disable_dynirq,
+	.irq_unmask		= enable_dynirq,
+
+	.irq_ack		= eoi_pirq,
+	.irq_eoi		= eoi_pirq,
+	.irq_mask_ack		= mask_ack_pirq,
 
 	.irq_set_affinity	= set_affinity_irq,
 
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index a7ffdfe19fc9..f6832f46aea4 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -427,6 +427,17 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
 	return 0;
 }
 
+static void gntalloc_vma_open(struct vm_area_struct *vma)
+{
+	struct gntalloc_gref *gref = vma->vm_private_data;
+	if (!gref)
+		return;
+
+	spin_lock(&gref_lock);
+	gref->users++;
+	spin_unlock(&gref_lock);
+}
+
 static void gntalloc_vma_close(struct vm_area_struct *vma)
 {
 	struct gntalloc_gref *gref = vma->vm_private_data;
@@ -441,6 +452,7 @@ static void gntalloc_vma_close(struct vm_area_struct *vma)
 }
 
 static struct vm_operations_struct gntalloc_vmops = {
+	.open = gntalloc_vma_open,
 	.close = gntalloc_vma_close,
 };
 
@@ -471,8 +483,6 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 	vma->vm_private_data = gref;
 
 	vma->vm_flags |= VM_RESERVED;
-	vma->vm_flags |= VM_DONTCOPY;
-	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
 
 	vma->vm_ops = &gntalloc_vmops;
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index b0f9e8fb0052..f914b26cf0c2 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -330,17 +330,26 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 
 /* ------------------------------------------------------------------ */
 
+static void gntdev_vma_open(struct vm_area_struct *vma)
+{
+	struct grant_map *map = vma->vm_private_data;
+
+	pr_debug("gntdev_vma_open %p\n", vma);
+	atomic_inc(&map->users);
+}
+
 static void gntdev_vma_close(struct vm_area_struct *vma)
 {
 	struct grant_map *map = vma->vm_private_data;
 
-	pr_debug("close %p\n", vma);
+	pr_debug("gntdev_vma_close %p\n", vma);
 	map->vma = NULL;
 	vma->vm_private_data = NULL;
 	gntdev_put_map(map);
 }
 
 static struct vm_operations_struct gntdev_vmops = {
+	.open = gntdev_vma_open,
 	.close = gntdev_vma_close,
 };
 
@@ -652,7 +661,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 
 	vma->vm_ops = &gntdev_vmops;
 
-	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
+	vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND;
+
+	if (use_ptemod)
+		vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP;
 
 	vma->vm_private_data = map;
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 3745a318defc..fd725cde6ad1 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -466,13 +466,30 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		if (map_ops[i].status)
 			continue;
 
-		/* m2p override only supported for GNTMAP_contains_pte mappings */
-		if (!(map_ops[i].flags & GNTMAP_contains_pte))
-			continue;
-		pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+		if (map_ops[i].flags & GNTMAP_contains_pte) {
+			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
 				(map_ops[i].host_addr & ~PAGE_MASK));
-		mfn = pte_mfn(*pte);
-		ret = m2p_add_override(mfn, pages[i]);
+			mfn = pte_mfn(*pte);
+		} else {
+			/* If you really wanted to do this:
+			 * mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
+			 *
+			 * The reason we do not implement it is b/c on the
+			 * unmap path (gnttab_unmap_refs) we have no means of
+			 * checking whether the page is !GNTMAP_contains_pte.
+			 *
+			 * That is without some extra data-structure to carry
+			 * the struct page, bool clear_pte, and list_head next
+			 * tuples and deal with allocation/delallocation, etc.
+			 *
+			 * The users of this API set the GNTMAP_contains_pte
+			 * flag so lets just return not supported until it
+			 * becomes neccessary to implement.
+			 */
+			return -EOPNOTSUPP;
+		}
+		ret = m2p_add_override(mfn, pages[i],
+				       map_ops[i].flags & GNTMAP_contains_pte);
 		if (ret)
 			return ret;
 	}
@@ -494,7 +511,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		return ret;
 
 	for (i = 0; i < count; i++) {
-		ret = m2p_remove_override(pages[i]);
+		ret = m2p_remove_override(pages[i], true /* clear the PTE */);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 95143dd6904d..0b5366b5be20 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -8,6 +8,7 @@
 #include <linux/sysrq.h>
 #include <linux/stop_machine.h>
 #include <linux/freezer.h>
+#include <linux/syscore_ops.h>
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
@@ -61,7 +62,7 @@ static void xen_post_suspend(int cancelled)
 	xen_mm_unpin_all();
 }
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 static int xen_suspend(void *data)
 {
 	struct suspend_info *si = data;
@@ -69,9 +70,9 @@ static int xen_suspend(void *data)
 
 	BUG_ON(!irqs_disabled());
 
-	err = sysdev_suspend(PMSG_FREEZE);
+	err = syscore_suspend();
 	if (err) {
-		printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
+		printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n",
 			err);
 		return err;
 	}
@@ -95,7 +96,7 @@ static int xen_suspend(void *data)
 		xen_timer_resume();
 	}
 
-	sysdev_resume();
+	syscore_resume();
 
 	return 0;
 }
@@ -173,7 +174,7 @@ out:
 #endif
 	shutting_down = SHUTDOWN_INVALID;
 }
-#endif	/* CONFIG_HIBERNATION */
+#endif	/* CONFIG_HIBERNATE_CALLBACKS */
 
 struct shutdown_handler {
 	const char *command;
@@ -202,7 +203,7 @@ static void shutdown_handler(struct xenbus_watch *watch,
 		{ "poweroff",	do_poweroff },
 		{ "halt",	do_poweroff },
 		{ "reboot",	do_reboot   },
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 		{ "suspend",	do_suspend  },
 #endif
 		{NULL, NULL},
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index 60f1827a32cb..1e0fe01eb670 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -215,7 +215,7 @@ static struct attribute_group xen_compilation_group = {
 	.attrs = xen_compile_attrs,
 };
 
-int __init static xen_compilation_init(void)
+static int __init xen_compilation_init(void)
 {
 	return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
 }
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 0ee594569dcc..85b67ffa2a43 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -286,11 +286,9 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
 
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 {
-	int err, flags;
+	int err;
 	struct p9_fid *fid;
-	struct v9fs_session_info *v9ses;
 
-	v9ses = v9fs_dentry2v9ses(dentry);
 	fid = v9fs_fid_clone_with_uid(dentry, 0);
 	if (IS_ERR(fid))
 		goto error_out;
@@ -299,17 +297,8 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 	 * dirty pages. We always request for the open fid in read-write
 	 * mode so that a partial page write which result in page
 	 * read can work.
-	 *
-	 * we don't have a tsyncfs operation for older version
-	 * of protocol. So make sure the write back fid is
-	 * opened in O_SYNC mode.
 	 */
-	if (!v9fs_proto_dotl(v9ses))
-		flags = O_RDWR | O_SYNC;
-	else
-		flags = O_RDWR;
-
-	err = p9_client_open(fid, flags);
+	err = p9_client_open(fid, O_RDWR);
 	if (err < 0) {
 		p9_client_clunk(fid);
 		fid = ERR_PTR(err);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9665c2b840e6..e5ebedfc5ed8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,7 +116,6 @@ struct v9fs_session_info {
 	struct list_head slist; /* list of sessions registered with v9fs */
 	struct backing_dev_info bdi;
 	struct rw_semaphore rename_sem;
-	struct p9_fid *root_fid; /* Used for file system sync */
 };
 
 /* cache_validity flags */
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b6a3b9f7fe4d..e022890c6f40 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -126,7 +126,9 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 			retval = v9fs_refresh_inode_dotl(fid, inode);
 		else
 			retval = v9fs_refresh_inode(fid, inode);
-		if (retval <= 0)
+		if (retval == -ENOENT)
+			return 0;
+		if (retval < 0)
 			return retval;
 	}
 out_valid:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ffbb113d5f33..82a7c38ddad0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -811,7 +811,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
 	fid = v9fs_fid_lookup(dentry);
 	if (IS_ERR(fid)) {
 		__putname(link);
-		link = ERR_PTR(PTR_ERR(fid));
+		link = ERR_CAST(fid);
 		goto ndset;
 	}
 	retval = p9_client_readlink(fid, &target);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f3eed3383e4f..feef6cdc1fd2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
 		retval = PTR_ERR(inode);
 		goto release_sb;
 	}
+
 	root = d_alloc_root(inode);
 	if (!root) {
 		iput(inode);
@@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
 		p9stat_free(st);
 		kfree(st);
 	}
-	v9fs_fid_add(root, fid);
 	retval = v9fs_get_acl(inode, fid);
 	if (retval)
 		goto release_sb;
-	/*
-	 * Add the root fid to session info. This is used
-	 * for file system sync. We want a cloned fid here
-	 * so that we can do a sync_filesystem after a
-	 * shrink_dcache_for_umount
-	 */
-	v9ses->root_fid = v9fs_fid_clone(root);
-	if (IS_ERR(v9ses->root_fid)) {
-		retval = PTR_ERR(v9ses->root_fid);
-		goto release_sb;
-	}
+	v9fs_fid_add(root, fid);
 
 	P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
 	return dget(sb->s_root);
@@ -210,11 +200,15 @@ close_session:
 	v9fs_session_close(v9ses);
 	kfree(v9ses);
 	return ERR_PTR(retval);
+
 release_sb:
 	/*
-	 * we will do the session_close and root dentry
-	 * release in the below call.
+	 * we will do the session_close and root dentry release
+	 * in the below call. But we need to clunk fid, because we haven't
+	 * attached the fid to dentry so it won't get clunked
+	 * automatically.
 	 */
+	p9_client_clunk(fid);
 	deactivate_locked_super(sb);
 	return ERR_PTR(retval);
 }
@@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s)
 	P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
 
 	kill_anon_super(s);
-	p9_client_clunk(v9ses->root_fid);
+
 	v9fs_session_cancel(v9ses);
 	v9fs_session_close(v9ses);
 	kfree(v9ses);
@@ -285,14 +279,6 @@ done:
 	return res;
 }
 
-static int v9fs_sync_fs(struct super_block *sb, int wait)
-{
-	struct v9fs_session_info *v9ses = sb->s_fs_info;
-
-	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
-	return p9_client_sync_fs(v9ses->root_fid);
-}
-
 static int v9fs_drop_inode(struct inode *inode)
 {
 	struct v9fs_session_info *v9ses;
@@ -307,6 +293,51 @@ static int v9fs_drop_inode(struct inode *inode)
 	return 1;
 }
 
+static int v9fs_write_inode(struct inode *inode,
+			    struct writeback_control *wbc)
+{
+	int ret;
+	struct p9_wstat wstat;
+	struct v9fs_inode *v9inode;
+	/*
+	 * send an fsync request to server irrespective of
+	 * wbc->sync_mode.
+	 */
+	P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+	v9inode = V9FS_I(inode);
+	if (!v9inode->writeback_fid)
+		return 0;
+	v9fs_blank_wstat(&wstat);
+
+	ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
+	if (ret < 0) {
+		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+		return ret;
+	}
+	return 0;
+}
+
+static int v9fs_write_inode_dotl(struct inode *inode,
+				 struct writeback_control *wbc)
+{
+	int ret;
+	struct v9fs_inode *v9inode;
+	/*
+	 * send an fsync request to server irrespective of
+	 * wbc->sync_mode.
+	 */
+	P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+	v9inode = V9FS_I(inode);
+	if (!v9inode->writeback_fid)
+		return 0;
+	ret = p9_client_fsync(v9inode->writeback_fid, 0);
+	if (ret < 0) {
+		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+		return ret;
+	}
+	return 0;
+}
+
 static const struct super_operations v9fs_super_ops = {
 	.alloc_inode = v9fs_alloc_inode,
 	.destroy_inode = v9fs_destroy_inode,
@@ -314,17 +345,18 @@ static const struct super_operations v9fs_super_ops = {
 	.evict_inode = v9fs_evict_inode,
 	.show_options = generic_show_options,
 	.umount_begin = v9fs_umount_begin,
+	.write_inode = v9fs_write_inode,
 };
 
 static const struct super_operations v9fs_super_ops_dotl = {
 	.alloc_inode = v9fs_alloc_inode,
 	.destroy_inode = v9fs_destroy_inode,
-	.sync_fs = v9fs_sync_fs,
 	.statfs = v9fs_statfs,
 	.drop_inode = v9fs_drop_inode,
 	.evict_inode = v9fs_evict_inode,
 	.show_options = generic_show_options,
 	.umount_begin = v9fs_umount_begin,
+	.write_inode = v9fs_write_inode_dotl,
 };
 
 struct file_system_type v9fs_fs_type = {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f34078d702d3..303983fabfd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	current->mm->start_stack = bprm->p;
 
 #ifdef arch_randomize_brk
-	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
+	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 		current->mm->brk = current->mm->start_brk =
 			arch_randomize_brk(current->mm);
+#ifdef CONFIG_COMPAT_BRK
+		current->brk_randomized = 1;
+#endif
+	}
 #endif
 
 	if (current->personality & MMAP_PAGE_ZERO) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5147bdd3b8e1..257b00e98428 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (!bdev->bd_part)
 				goto out_clear;
 
+			ret = 0;
 			if (disk->fops->open) {
 				ret = disk->fops->open(bdev, mode);
 				if (ret == -ERESTARTSYS) {
@@ -1118,9 +1119,18 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					put_disk(disk);
 					goto restart;
 				}
-				if (ret)
-					goto out_clear;
 			}
+			/*
+			 * If the device is invalidated, rescan partition
+			 * if open succeeded or failed with -ENOMEDIUM.
+			 * The latter is necessary to prevent ghost
+			 * partitions on a removed medium.
+			 */
+			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
+				rescan_partitions(disk, bdev);
+			if (ret)
+				goto out_clear;
+
 			if (!bdev->bd_openers) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
 				bdi = blk_get_backing_dev_info(bdev);
@@ -1128,8 +1138,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					bdi = &default_backing_dev_info;
 				bdev_inode_switch_bdi(bdev->bd_inode, bdi);
 			}
-			if (bdev->bd_invalidated)
-				rescan_partitions(disk, bdev);
 		} else {
 			struct block_device *whole;
 			whole = bdget_disk(disk, 0);
@@ -1153,13 +1161,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 		}
 	} else {
 		if (bdev->bd_contains == bdev) {
-			if (bdev->bd_disk->fops->open) {
+			ret = 0;
+			if (bdev->bd_disk->fops->open)
 				ret = bdev->bd_disk->fops->open(bdev, mode);
-				if (ret)
-					goto out_unlock_bdev;
-			}
-			if (bdev->bd_invalidated)
+			/* the same as first opener case, read comment there */
+			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
 				rescan_partitions(bdev->bd_disk, bdev);
+			if (ret)
+				goto out_unlock_bdev;
 		}
 		/* only one opener holds refs to the module and disk */
 		module_put(disk->fops->owner);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index de34bfad9ec3..44ea5b92e1ba 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,18 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
 
 	if (value) {
 		acl = posix_acl_from_xattr(value, size);
-		if (acl == NULL) {
-			value = NULL;
-			size = 0;
-		} else if (IS_ERR(acl)) {
+		if (IS_ERR(acl))
 			return PTR_ERR(acl);
+
+		if (acl) {
+			ret = posix_acl_valid(acl);
+			if (ret)
+				goto out;
 		}
 	}
 
 	ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
-
+out:
 	posix_acl_release(acl);
 
 	return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3458b5725540..8f4b81de3ae2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -718,7 +718,7 @@ struct btrfs_space_info {
 	u64 total_bytes;	/* total bytes in the space,
 				   this doesn't take mirrors into account */
 	u64 bytes_used;		/* total bytes used,
-				   this does't take mirrors into account */
+				   this doesn't take mirrors into account */
 	u64 bytes_pinned;	/* total bytes pinned, will be freed when the
 				   transaction finishes */
 	u64 bytes_reserved;	/* total bytes the allocator has reserved for
@@ -740,8 +740,10 @@ struct btrfs_space_info {
 	 */
 	unsigned long reservation_progress;
 
-	int full;		/* indicates that we cannot allocate any more
+	int full:1;		/* indicates that we cannot allocate any more
 				   chunks for this space */
+	int chunk_alloc:1;	/* set if we are allocating a chunk */
+
 	int force_alloc;	/* set if we need to force a chunk alloc for
 				   this space */
 
@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 			      struct inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
+void btrfs_drop_pages(struct page **pages, size_t num_pages);
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
+		      struct page **pages, size_t num_pages,
+		      loff_t pos, size_t write_bytes,
+		      struct extent_state **cached);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8f1d44ba332f..228cf36ece83 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2824,6 +2824,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
 	spin_lock(&delayed_refs->lock);
 	if (delayed_refs->num_entries == 0) {
+		spin_unlock(&delayed_refs->lock);
 		printk(KERN_INFO "delayed_refs has NO entry\n");
 		return ret;
 	}
@@ -3057,7 +3058,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
 		btrfs_destroy_pinned_extent(root,
 					    root->fs_info->pinned_extents);
 
-		t->use_count = 0;
+		atomic_set(&t->use_count, 0);
 		list_del_init(&t->list);
 		memset(t, 0, sizeof(*t));
 		kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..9ee6bd55e16c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
 #include "locking.h"
 #include "free-space-cache.h"
 
+/* control flags for do_chunk_alloc's force field
+ * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
+ * if we really need one.
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one
+ * if we have very few chunks already allocated.  This is
+ * used as part of the clustering code to help make sure
+ * we have a good pool of storage to cluster in, without
+ * filling the FS with empty chunks
+ *
+ */
+enum {
+	CHUNK_ALLOC_NO_FORCE = 0,
+	CHUNK_ALLOC_FORCE = 1,
+	CHUNK_ALLOC_LIMITED = 2,
+};
+
 static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	found->bytes_readonly = 0;
 	found->bytes_may_use = 0;
 	found->full = 0;
-	found->force_alloc = 0;
+	found->force_alloc = CHUNK_ALLOC_NO_FORCE;
+	found->chunk_alloc = 0;
 	*space_info = found;
 	list_add_rcu(&found->list, &info->space_info);
 	atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
 		if (!data_sinfo->full && alloc_chunk) {
 			u64 alloc_target;
 
-			data_sinfo->force_alloc = 1;
+			data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
 			spin_unlock(&data_sinfo->lock);
 alloc:
 			alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
 
 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 					     bytes + 2 * 1024 * 1024,
-					     alloc_target, 0);
+					     alloc_target,
+					     CHUNK_ALLOC_NO_FORCE);
 			btrfs_end_transaction(trans, root);
 			if (ret < 0) {
 				if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
 	rcu_read_lock();
 	list_for_each_entry_rcu(found, head, list) {
 		if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
-			found->force_alloc = 1;
+			found->force_alloc = CHUNK_ALLOC_FORCE;
 	}
 	rcu_read_unlock();
 }
 
 static int should_alloc_chunk(struct btrfs_root *root,
-			      struct btrfs_space_info *sinfo, u64 alloc_bytes)
+			      struct btrfs_space_info *sinfo, u64 alloc_bytes,
+			      int force)
 {
 	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+	u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
 	u64 thresh;
 
-	if (sinfo->bytes_used + sinfo->bytes_reserved +
-	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+	if (force == CHUNK_ALLOC_FORCE)
+		return 1;
+
+	/*
+	 * in limited mode, we want to have some free space up to
+	 * about 1% of the FS size.
+	 */
+	if (force == CHUNK_ALLOC_LIMITED) {
+		thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+		thresh = max_t(u64, 64 * 1024 * 1024,
+			       div_factor_fine(thresh, 1));
+
+		if (num_bytes - num_allocated < thresh)
+			return 1;
+	}
+
+	/*
+	 * we have two similar checks here, one based on percentage
+	 * and once based on a hard number of 256MB.  The idea
+	 * is that if we have a good amount of free
+	 * room, don't allocate a chunk.  A good mount is
+	 * less than 80% utilized of the chunks we have allocated,
+	 * or more than 256MB free
+	 */
+	if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
 		return 0;
 
-	if (sinfo->bytes_used + sinfo->bytes_reserved +
-	    alloc_bytes < div_factor(num_bytes, 8))
+	if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
 		return 0;
 
 	thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+
+	/* 256MB or 5% of the FS */
 	thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
 
 	if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
 		return 0;
-
 	return 1;
 }
 
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_space_info *space_info;
 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
+	int wait_for_alloc = 0;
 	int ret = 0;
 
-	mutex_lock(&fs_info->chunk_mutex);
-
 	flags = btrfs_reduce_alloc_profile(extent_root, flags);
 
 	space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 	}
 	BUG_ON(!space_info);
 
+again:
 	spin_lock(&space_info->lock);
 	if (space_info->force_alloc)
-		force = 1;
+		force = space_info->force_alloc;
 	if (space_info->full) {
 		spin_unlock(&space_info->lock);
-		goto out;
+		return 0;
 	}
 
-	if (!force && !should_alloc_chunk(extent_root, space_info,
-					  alloc_bytes)) {
+	if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
 		spin_unlock(&space_info->lock);
-		goto out;
+		return 0;
+	} else if (space_info->chunk_alloc) {
+		wait_for_alloc = 1;
+	} else {
+		space_info->chunk_alloc = 1;
 	}
+
 	spin_unlock(&space_info->lock);
 
+	mutex_lock(&fs_info->chunk_mutex);
+
+	/*
+	 * The chunk_mutex is held throughout the entirety of a chunk
+	 * allocation, so once we've acquired the chunk_mutex we know that the
+	 * other guy is done and we need to recheck and see if we should
+	 * allocate.
+	 */
+	if (wait_for_alloc) {
+		mutex_unlock(&fs_info->chunk_mutex);
+		wait_for_alloc = 0;
+		goto again;
+	}
+
 	/*
 	 * If we have mixed data/metadata chunks we want to make sure we keep
 	 * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 		space_info->full = 1;
 	else
 		ret = 1;
-	space_info->force_alloc = 0;
+
+	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+	space_info->chunk_alloc = 0;
 	spin_unlock(&space_info->lock);
-out:
 	mutex_unlock(&extent_root->fs_info->chunk_mutex);
 	return ret;
 }
@@ -5303,11 +5368,13 @@ loop:
 
 		if (allowed_chunk_alloc) {
 			ret = do_chunk_alloc(trans, root, num_bytes +
-					     2 * 1024 * 1024, data, 1);
+					     2 * 1024 * 1024, data,
+					     CHUNK_ALLOC_LIMITED);
 			allowed_chunk_alloc = 0;
 			done_chunk_alloc = 1;
-		} else if (!done_chunk_alloc) {
-			space_info->force_alloc = 1;
+		} else if (!done_chunk_alloc &&
+			   space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
+			space_info->force_alloc = CHUNK_ALLOC_LIMITED;
 		}
 
 		if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
 	 */
 	if (empty_size || root->ref_cows)
 		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-				     num_bytes + 2 * 1024 * 1024, data, 0);
+				     num_bytes + 2 * 1024 * 1024, data,
+				     CHUNK_ALLOC_NO_FORCE);
 
 	WARN_ON(num_bytes < root->sectorsize);
 	ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
 		num_bytes = num_bytes & ~(root->sectorsize - 1);
 		num_bytes = max(num_bytes, min_alloc_size);
 		do_chunk_alloc(trans, root->fs_info->extent_root,
-			       num_bytes, data, 1);
+			       num_bytes, data, CHUNK_ALLOC_FORCE);
 		goto again;
 	}
 	if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -7991,6 +8059,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
 				u64 group_start = group->key.objectid;
 				new_extents = kmalloc(sizeof(*new_extents),
 						      GFP_NOFS);
+				if (!new_extents) {
+					ret = -ENOMEM;
+					goto out;
+				}
 				nr_extents = 1;
 				ret = get_new_locations(reloc_inode,
 							extent_key,
@@ -8109,13 +8181,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 
 	alloc_flags = update_block_group_flags(root, cache->flags);
 	if (alloc_flags != cache->flags)
-		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+			       CHUNK_ALLOC_FORCE);
 
 	ret = set_block_group_ro(cache);
 	if (!ret)
 		goto out;
 	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
-	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+			     CHUNK_ALLOC_FORCE);
 	if (ret < 0)
 		goto out;
 	ret = set_block_group_ro(cache);
@@ -8128,7 +8202,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root, u64 type)
 {
 	u64 alloc_flags = get_alloc_profile(root, type);
-	return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+	return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+			      CHUNK_ALLOC_FORCE);
 }
 
 /*
@@ -8781,23 +8856,38 @@ out:
 int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_space_info *space_info;
+	struct btrfs_super_block *disk_super;
+	u64 features;
+	u64 flags;
+	int mixed = 0;
 	int ret;
 
-	ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0,
-								 &space_info);
-	if (ret)
-		return ret;
+	disk_super = &fs_info->super_copy;
+	if (!btrfs_super_root(disk_super))
+		return 1;
 
-	ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0,
-								 &space_info);
-	if (ret)
-		return ret;
+	features = btrfs_super_incompat_flags(disk_super);
+	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+		mixed = 1;
 
-	ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0,
-								 &space_info);
+	flags = BTRFS_BLOCK_GROUP_SYSTEM;
+	ret = update_space_info(fs_info, flags, 0, 0, &space_info);
 	if (ret)
-		return ret;
+		goto out;
 
+	if (mixed) {
+		flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
+		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+	} else {
+		flags = BTRFS_BLOCK_GROUP_METADATA;
+		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+		if (ret)
+			goto out;
+
+		flags = BTRFS_BLOCK_GROUP_DATA;
+		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+	}
+out:
 	return ret;
 }
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20ddb28602a8..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
+#include <linux/prefetch.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "compat.h"
@@ -690,6 +691,15 @@ static void cache_state(struct extent_state *state,
 	}
 }
 
+static void uncache_state(struct extent_state **cached_ptr)
+{
+	if (cached_ptr && (*cached_ptr)) {
+		struct extent_state *state = *cached_ptr;
+		*cached_ptr = NULL;
+		free_extent_state(state);
+	}
+}
+
 /*
  * set some bits on a range in the tree.  This may require allocations or
  * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +950,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
 }
 
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-			gfp_t mask)
+			struct extent_state **cached_state, gfp_t mask)
 {
-	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
-			      NULL, mask);
+	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
+			      NULL, cached_state, mask);
 }
 
 static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1022,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
 				mask);
 }
 
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
-		  gfp_t mask)
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
 {
 	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
 				mask);
@@ -1735,6 +1744,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 
 	do {
 		struct page *page = bvec->bv_page;
+		struct extent_state *cached = NULL;
+		struct extent_state *state;
+
 		tree = &BTRFS_I(page->mapping->host)->io_tree;
 
 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1761,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 		if (++bvec <= bvec_end)
 			prefetchw(&bvec->bv_page->flags);
 
+		spin_lock(&tree->lock);
+		state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
+		if (state && state->start == start) {
+			/*
+			 * take a reference on the state, unlock will drop
+			 * the ref
+			 */
+			cache_state(state, &cached);
+		}
+		spin_unlock(&tree->lock);
+
 		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
 			ret = tree->ops->readpage_end_io_hook(page, start, end,
-							      NULL);
+							      state);
 			if (ret)
 				uptodate = 0;
 		}
@@ -1764,15 +1787,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 					test_bit(BIO_UPTODATE, &bio->bi_flags);
 				if (err)
 					uptodate = 0;
+				uncache_state(&cached);
 				continue;
 			}
 		}
 
 		if (uptodate) {
-			set_extent_uptodate(tree, start, end,
+			set_extent_uptodate(tree, start, end, &cached,
 					    GFP_ATOMIC);
 		}
-		unlock_extent(tree, start, end, GFP_ATOMIC);
+		unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
 
 		if (whole_page) {
 			if (uptodate) {
@@ -1811,6 +1835,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
 
 	do {
 		struct page *page = bvec->bv_page;
+		struct extent_state *cached = NULL;
 		tree = &BTRFS_I(page->mapping->host)->io_tree;
 
 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1846,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
 			prefetchw(&bvec->bv_page->flags);
 
 		if (uptodate) {
-			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+			set_extent_uptodate(tree, start, end, &cached,
+					    GFP_ATOMIC);
 		} else {
 			ClearPageUptodate(page);
 			SetPageError(page);
 		}
 
-		unlock_extent(tree, start, end, GFP_ATOMIC);
+		unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
 
 	} while (bvec >= bio->bi_io_vec);
 
@@ -2016,14 +2042,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 	while (cur <= end) {
 		if (cur >= last_byte) {
 			char *userpage;
+			struct extent_state *cached = NULL;
+
 			iosize = PAGE_CACHE_SIZE - page_offset;
 			userpage = kmap_atomic(page, KM_USER0);
 			memset(userpage + page_offset, 0, iosize);
 			flush_dcache_page(page);
 			kunmap_atomic(userpage, KM_USER0);
 			set_extent_uptodate(tree, cur, cur + iosize - 1,
-					    GFP_NOFS);
-			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+					    &cached, GFP_NOFS);
+			unlock_extent_cached(tree, cur, cur + iosize - 1,
+					     &cached, GFP_NOFS);
 			break;
 		}
 		em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2092,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 		/* we've found a hole, just zero and go on */
 		if (block_start == EXTENT_MAP_HOLE) {
 			char *userpage;
+			struct extent_state *cached = NULL;
+
 			userpage = kmap_atomic(page, KM_USER0);
 			memset(userpage + page_offset, 0, iosize);
 			flush_dcache_page(page);
 			kunmap_atomic(userpage, KM_USER0);
 
 			set_extent_uptodate(tree, cur, cur + iosize - 1,
-					    GFP_NOFS);
-			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+					    &cached, GFP_NOFS);
+			unlock_extent_cached(tree, cur, cur + iosize - 1,
+			                     &cached, GFP_NOFS);
 			cur = cur + iosize;
 			page_offset += iosize;
 			continue;
@@ -2650,7 +2682,7 @@ int extent_readpages(struct extent_io_tree *tree,
 		prefetchw(&page->flags);
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping,
-					page->index, GFP_KERNEL)) {
+					page->index, GFP_NOFS)) {
 			__extent_read_full_page(tree, page, get_extent,
 						&bio, 0, &bio_flags);
 		}
@@ -2789,9 +2821,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
 			iocount++;
 			block_start = block_start + iosize;
 		} else {
-			set_extent_uptodate(tree, block_start, cur_end,
+			struct extent_state *cached = NULL;
+
+			set_extent_uptodate(tree, block_start, cur_end, &cached,
 					    GFP_NOFS);
-			unlock_extent(tree, block_start, cur_end, GFP_NOFS);
+			unlock_extent_cached(tree, block_start, cur_end,
+					     &cached, GFP_NOFS);
 			block_start = cur_end + 1;
 		}
 		page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3492,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
 	num_pages = num_extent_pages(eb->start, eb->len);
 
 	set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-			    GFP_NOFS);
+			    NULL, GFP_NOFS);
 	for (i = 0; i < num_pages; i++) {
 		page = extent_buffer_page(eb, i);
 		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3920,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
 	kunmap_atomic(dst_kaddr, KM_USER0);
 }
 
+static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
+{
+	unsigned long distance = (src > dst) ? src - dst : dst - src;
+	return distance < len;
+}
+
 static void copy_pages(struct page *dst_page, struct page *src_page,
 		       unsigned long dst_off, unsigned long src_off,
 		       unsigned long len)
@@ -3892,10 +3933,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
 	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
 	char *src_kaddr;
 
-	if (dst_page != src_page)
+	if (dst_page != src_page) {
 		src_kaddr = kmap_atomic(src_page, KM_USER1);
-	else
+	} else {
 		src_kaddr = dst_kaddr;
+		BUG_ON(areas_overlap(src_off, dst_off, len));
+	}
 
 	memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
 	kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4013,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 		       "len %lu len %lu\n", dst_offset, len, dst->len);
 		BUG_ON(1);
 	}
-	if (dst_offset < src_offset) {
+	if (!areas_overlap(src_offset, dst_offset, len)) {
 		memcpy_extent_buffer(dst, dst_offset, src_offset, len);
 		return;
 	}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f62c5442835d..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 		   int bits, int exclusive_bits, u64 *failed_start,
 		   struct extent_state **cached_state, gfp_t mask);
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-			gfp_t mask);
+			struct extent_state **cached_state, gfp_t mask);
 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
 		   gfp_t mask);
 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e621ea54a3fd..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 /*
  * unlocks pages after btrfs_file_write is done with them
  */
-static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
+void btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
 	size_t i;
 	for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
  * this also makes the decision about creating an inline extent vs
  * doing real data extents, marking pages dirty and delalloc as required.
  */
-static noinline int dirty_and_release_pages(struct btrfs_root *root,
-					    struct file *file,
-					    struct page **pages,
-					    size_t num_pages,
-					    loff_t pos,
-					    size_t write_bytes)
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
+		      struct page **pages, size_t num_pages,
+		      loff_t pos, size_t write_bytes,
+		      struct extent_state **cached)
 {
 	int err = 0;
 	int i;
-	struct inode *inode = fdentry(file)->d_inode;
 	u64 num_bytes;
 	u64 start_pos;
 	u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
 
 	end_of_last_block = start_pos + num_bytes - 1;
 	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
-					NULL);
+					cached);
 	if (err)
 		return err;
 
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		}
 
 		if (copied > 0) {
-			ret = dirty_and_release_pages(root, file, pages,
-						      dirty_pages, pos,
-						      copied);
+			ret = btrfs_dirty_pages(root, inode, pages,
+						dirty_pages, pos, copied,
+						NULL);
 			if (ret) {
 				btrfs_delalloc_release_space(inode,
 					dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f561c953205b..63731a1fb0a1 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	struct inode *inode;
 	struct rb_node *node;
 	struct list_head *pos, *n;
+	struct page **pages;
 	struct page *page;
 	struct extent_state *cached_state = NULL;
 	struct btrfs_free_cluster *cluster = NULL;
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	u64 start, end, len;
 	u64 bytes = 0;
 	u32 *crc, *checksums;
-	pgoff_t index = 0, last_index = 0;
 	unsigned long first_page_offset;
-	int num_checksums;
+	int index = 0, num_pages = 0;
 	int entries = 0;
 	int bitmaps = 0;
 	int ret = 0;
 	bool next_page = false;
+	bool out_of_space = false;
 
 	root = root->fs_info->tree_root;
 
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		return 0;
 	}
 
-	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+		PAGE_CACHE_SHIFT;
 	filemap_write_and_wait(inode->i_mapping);
 	btrfs_wait_ordered_range(inode, inode->i_size &
 				 ~(root->sectorsize - 1), (u64)-1);
 
 	/* We need a checksum per page. */
-	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
-	crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+	crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
 	if (!crc) {
 		iput(inode);
 		return 0;
 	}
 
+	pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
+	if (!pages) {
+		kfree(crc);
+		iput(inode);
+		return 0;
+	}
+
 	/* Since the first page has all of our checksums and our generation we
 	 * need to calculate the offset into the page that we can start writing
 	 * our entries.
 	 */
-	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+	first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
 
 	/* Get the cluster for this block_group if it exists */
 	if (!list_empty(&block_group->cluster_list))
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	 * after find_get_page at this point.  Just putting this here so people
 	 * know and don't freak out.
 	 */
-	while (index <= last_index) {
+	while (index < num_pages) {
 		page = grab_cache_page(inode->i_mapping, index);
 		if (!page) {
-			pgoff_t i = 0;
+			int i;
 
-			while (i < index) {
-				page = find_get_page(inode->i_mapping, i);
-				unlock_page(page);
-				page_cache_release(page);
-				page_cache_release(page);
-				i++;
+			for (i = 0; i < num_pages; i++) {
+				unlock_page(pages[i]);
+				page_cache_release(pages[i]);
 			}
 			goto out_free;
 		}
+		pages[index] = page;
 		index++;
 	}
 
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 			offset = start_offset;
 		}
 
-		page = find_get_page(inode->i_mapping, index);
+		if (index >= num_pages) {
+			out_of_space = true;
+			break;
+		}
+
+		page = pages[index];
 
 		addr = kmap(page);
 		entry = addr + start_offset;
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 
 		bytes += PAGE_CACHE_SIZE;
 
-		ClearPageChecked(page);
-		set_page_extent_mapped(page);
-		SetPageUptodate(page);
-		set_page_dirty(page);
-
-		/*
-		 * We need to release our reference we got for grab_cache_page,
-		 * except for the first page which will hold our checksums, we
-		 * do that below.
-		 */
-		if (index != 0) {
-			unlock_page(page);
-			page_cache_release(page);
-		}
-
-		page_cache_release(page);
-
 		index++;
 	} while (node || next_page);
 
@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		struct btrfs_free_space *entry =
 			list_entry(pos, struct btrfs_free_space, list);
 
-		page = find_get_page(inode->i_mapping, index);
+		if (index >= num_pages) {
+			out_of_space = true;
+			break;
+		}
+		page = pages[index];
 
 		addr = kmap(page);
 		memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		crc++;
 		bytes += PAGE_CACHE_SIZE;
 
-		ClearPageChecked(page);
-		set_page_extent_mapped(page);
-		SetPageUptodate(page);
-		set_page_dirty(page);
-		unlock_page(page);
-		page_cache_release(page);
-		page_cache_release(page);
 		list_del_init(&entry->list);
 		index++;
 	}
 
+	if (out_of_space) {
+		btrfs_drop_pages(pages, num_pages);
+		unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+				     i_size_read(inode) - 1, &cached_state,
+				     GFP_NOFS);
+		ret = 0;
+		goto out_free;
+	}
+
 	/* Zero out the rest of the pages just to make sure */
-	while (index <= last_index) {
+	while (index < num_pages) {
 		void *addr;
 
-		page = find_get_page(inode->i_mapping, index);
-
+		page = pages[index];
 		addr = kmap(page);
 		memset(addr, 0, PAGE_CACHE_SIZE);
 		kunmap(page);
-		ClearPageChecked(page);
-		set_page_extent_mapped(page);
-		SetPageUptodate(page);
-		set_page_dirty(page);
-		unlock_page(page);
-		page_cache_release(page);
-		page_cache_release(page);
 		bytes += PAGE_CACHE_SIZE;
 		index++;
 	}
 
-	btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
-
 	/* Write the checksums and trans id to the first page */
 	{
 		void *addr;
 		u64 *gen;
 
-		page = find_get_page(inode->i_mapping, 0);
+		page = pages[0];
 
 		addr = kmap(page);
-		memcpy(addr, checksums, sizeof(u32) * num_checksums);
-		gen = addr + (sizeof(u32) * num_checksums);
+		memcpy(addr, checksums, sizeof(u32) * num_pages);
+		gen = addr + (sizeof(u32) * num_pages);
 		*gen = trans->transid;
 		kunmap(page);
-		ClearPageChecked(page);
-		set_page_extent_mapped(page);
-		SetPageUptodate(page);
-		set_page_dirty(page);
-		unlock_page(page);
-		page_cache_release(page);
-		page_cache_release(page);
 	}
-	BTRFS_I(inode)->generation = trans->transid;
 
+	ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
+					    bytes, &cached_state);
+	btrfs_drop_pages(pages, num_pages);
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
 			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
 
+	if (ret) {
+		ret = 0;
+		goto out_free;
+	}
+
+	BTRFS_I(inode)->generation = trans->transid;
+
 	filemap_write_and_wait(inode->i_mapping);
 
 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -853,6 +845,7 @@ out_free:
 		BTRFS_I(inode)->generation = 0;
 	}
 	kfree(checksums);
+	kfree(pages);
 	btrfs_update_inode(trans, root, inode);
 	iput(inode);
 	return ret;
@@ -1775,10 +1768,13 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
 
 	while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
 		info = rb_entry(node, struct btrfs_free_space, offset_index);
-		unlink_free_space(block_group, info);
-		if (info->bitmap)
-			kfree(info->bitmap);
-		kmem_cache_free(btrfs_free_space_cachep, info);
+		if (!info->bitmap) {
+			unlink_free_space(block_group, info);
+			kmem_cache_free(btrfs_free_space_cachep, info);
+		} else {
+			free_bitmap(block_group, info);
+		}
+
 		if (need_resched()) {
 			spin_unlock(&block_group->tree_lock);
 			cond_resched();
@@ -2308,7 +2304,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
 			start = entry->offset;
 			bytes = min(entry->bytes, end - start);
 			unlink_free_space(block_group, entry);
-			kfree(entry);
+			kmem_cache_free(btrfs_free_space_cachep, entry);
 		}
 
 		spin_unlock(&block_group->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5cc64ab9c485..7cd8ab0ef04d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -954,6 +954,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 			 1, 0, NULL, GFP_NOFS);
 	while (start < end) {
 		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
+		BUG_ON(!async_cow);
 		async_cow->inode = inode;
 		async_cow->root = root;
 		async_cow->locked_page = locked_page;
@@ -1770,9 +1771,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	add_pending_csums(trans, inode, ordered_extent->file_offset,
 			  &ordered_extent->list);
 
-	btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-	ret = btrfs_update_inode(trans, root, inode);
-	BUG_ON(ret);
+	ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+	if (!ret) {
+		ret = btrfs_update_inode(trans, root, inode);
+		BUG_ON(ret);
+	}
+	ret = 0;
 out:
 	if (nolock) {
 		if (trans)
@@ -2590,6 +2594,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 			    struct btrfs_inode_item *item,
 			    struct inode *inode)
 {
+	if (!leaf->map_token)
+		map_private_extent_buffer(leaf, (unsigned long)item,
+					  sizeof(struct btrfs_inode_item),
+					  &leaf->map_token, &leaf->kaddr,
+					  &leaf->map_start, &leaf->map_len,
+					  KM_USER1);
+
 	btrfs_set_inode_uid(leaf, item, inode->i_uid);
 	btrfs_set_inode_gid(leaf, item, inode->i_gid);
 	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2618,6 +2629,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
 	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
 	btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
+
+	if (leaf->map_token) {
+		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+		leaf->map_token = NULL;
+	}
 }
 
 /*
@@ -4207,10 +4223,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 	struct btrfs_key found_key;
 	struct btrfs_path *path;
 	int ret;
-	u32 nritems;
 	struct extent_buffer *leaf;
 	int slot;
-	int advance;
 	unsigned char d_type;
 	int over = 0;
 	u32 di_cur;
@@ -4253,27 +4267,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto err;
-	advance = 0;
 
 	while (1) {
 		leaf = path->nodes[0];
-		nritems = btrfs_header_nritems(leaf);
 		slot = path->slots[0];
-		if (advance || slot >= nritems) {
-			if (slot >= nritems - 1) {
-				ret = btrfs_next_leaf(root, path);
-				if (ret)
-					break;
-				leaf = path->nodes[0];
-				nritems = btrfs_header_nritems(leaf);
-				slot = path->slots[0];
-			} else {
-				slot++;
-				path->slots[0]++;
-			}
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto err;
+			else if (ret > 0)
+				break;
+			continue;
 		}
 
-		advance = 1;
 		item = btrfs_item_nr(leaf, slot);
 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
@@ -4282,7 +4288,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 		if (btrfs_key_type(&found_key) != key_type)
 			break;
 		if (found_key.offset < filp->f_pos)
-			continue;
+			goto next;
 
 		filp->f_pos = found_key.offset;
 
@@ -4335,6 +4341,8 @@ skip:
 			di_cur += di_len;
 			di = (struct btrfs_dir_item *)((char *)di + di_len);
 		}
+next:
+		path->slots[0]++;
 	}
 
 	/* Reached end of directory/root. Bump pos past the last item. */
@@ -4527,14 +4535,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	BUG_ON(!path);
 
 	inode = new_inode(root->fs_info->sb);
-	if (!inode)
+	if (!inode) {
+		btrfs_free_path(path);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	if (dir) {
 		trace_btrfs_inode_request(dir);
 
 		ret = btrfs_set_inode_index(dir, index);
 		if (ret) {
+			btrfs_free_path(path);
 			iput(inode);
 			return ERR_PTR(ret);
 		}
@@ -4721,9 +4732,10 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, mode, &index);
-	err = PTR_ERR(inode);
-	if (IS_ERR(inode))
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out_unlock;
+	}
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
 	if (err) {
@@ -4782,9 +4794,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, mode, &index);
-	err = PTR_ERR(inode);
-	if (IS_ERR(inode))
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out_unlock;
+	}
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
 	if (err) {
@@ -4834,9 +4847,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (inode->i_nlink == ~0U)
 		return -EMLINK;
 
-	btrfs_inc_nlink(inode);
-	inode->i_ctime = CURRENT_TIME;
-
 	err = btrfs_set_inode_index(dir, &index);
 	if (err)
 		goto fail;
@@ -4852,6 +4862,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 		goto fail;
 	}
 
+	btrfs_inc_nlink(inode);
+	inode->i_ctime = CURRENT_TIME;
+
 	btrfs_set_trans_block_group(trans, dir);
 	ihold(inode);
 
@@ -4989,6 +5002,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	inline_size = btrfs_file_extent_inline_item_len(leaf,
 					btrfs_item_nr(leaf, path->slots[0]));
 	tmp = kmalloc(inline_size, GFP_NOFS);
+	if (!tmp)
+		return -ENOMEM;
 	ptr = btrfs_file_extent_inline_start(item);
 
 	read_extent_buffer(leaf, tmp, ptr, inline_size);
@@ -5221,7 +5236,7 @@ again:
 			btrfs_mark_buffer_dirty(leaf);
 		}
 		set_extent_uptodate(io_tree, em->start,
-				    extent_map_end(em) - 1, GFP_NOFS);
+				    extent_map_end(em) - 1, NULL, GFP_NOFS);
 		goto insert;
 	} else {
 		printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5428,17 +5443,30 @@ out:
 }
 
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+						  struct extent_map *em,
 						  u64 start, u64 len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
-	struct extent_map *em;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct btrfs_key ins;
 	u64 alloc_hint;
 	int ret;
+	bool insert = false;
 
-	btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+	/*
+	 * Ok if the extent map we looked up is a hole and is for the exact
+	 * range we want, there is no reason to allocate a new one, however if
+	 * it is not right then we need to free this one and drop the cache for
+	 * our range.
+	 */
+	if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
+	    em->len != len) {
+		free_extent_map(em);
+		em = NULL;
+		insert = true;
+		btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+	}
 
 	trans = btrfs_join_transaction(root, 0);
 	if (IS_ERR(trans))
@@ -5454,10 +5482,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 		goto out;
 	}
 
-	em = alloc_extent_map(GFP_NOFS);
 	if (!em) {
-		em = ERR_PTR(-ENOMEM);
-		goto out;
+		em = alloc_extent_map(GFP_NOFS);
+		if (!em) {
+			em = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 	}
 
 	em->start = start;
@@ -5467,9 +5497,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 	em->block_start = ins.objectid;
 	em->block_len = ins.offset;
 	em->bdev = root->fs_info->fs_devices->latest_bdev;
+
+	/*
+	 * We need to do this because if we're using the original em we searched
+	 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
+	 */
+	em->flags = 0;
 	set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
-	while (1) {
+	while (insert) {
 		write_lock(&em_tree->lock);
 		ret = add_extent_mapping(em_tree, em);
 		write_unlock(&em_tree->lock);
@@ -5687,8 +5723,7 @@ must_cow:
 	 * it above
 	 */
 	len = bh_result->b_size;
-	free_extent_map(em);
-	em = btrfs_new_extent_direct(inode, start, len);
+	em = btrfs_new_extent_direct(inode, em, start, len);
 	if (IS_ERR(em))
 		return PTR_ERR(em);
 	len = min(len, em->len - (start - em->start));
@@ -5851,8 +5886,10 @@ again:
 	}
 
 	add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
-	btrfs_ordered_update_i_size(inode, 0, ordered);
-	btrfs_update_inode(trans, root, inode);
+	ret = btrfs_ordered_update_i_size(inode, 0, ordered);
+	if (!ret)
+		btrfs_update_inode(trans, root, inode);
+	ret = 0;
 out_unlock:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
 			     ordered->file_offset + ordered->len - 1,
@@ -5938,7 +5975,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 					 int rw, u64 file_offset, int skip_sum,
-					 u32 *csums)
+					 u32 *csums, int async_submit)
 {
 	int write = rw & REQ_WRITE;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5949,13 +5986,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 	if (ret)
 		goto err;
 
-	if (write && !skip_sum) {
+	if (skip_sum)
+		goto map;
+
+	if (write && async_submit) {
 		ret = btrfs_wq_submit_bio(root->fs_info,
 				   inode, rw, bio, 0, 0,
 				   file_offset,
 				   __btrfs_submit_bio_start_direct_io,
 				   __btrfs_submit_bio_done);
 		goto err;
+	} else if (write) {
+		/*
+		 * If we aren't doing async submit, calculate the csum of the
+		 * bio now.
+		 */
+		ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
+		if (ret)
+			goto err;
 	} else if (!skip_sum) {
 		ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
 					  file_offset, csums);
@@ -5963,7 +6011,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 			goto err;
 	}
 
-	ret = btrfs_map_bio(root, rw, bio, 0, 1);
+map:
+	ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
 err:
 	bio_put(bio);
 	return ret;
@@ -5985,23 +6034,30 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 	int nr_pages = 0;
 	u32 *csums = dip->csums;
 	int ret = 0;
+	int async_submit = 0;
 	int write = rw & REQ_WRITE;
 
-	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
-	if (!bio)
-		return -ENOMEM;
-	bio->bi_private = dip;
-	bio->bi_end_io = btrfs_end_dio_bio;
-	atomic_inc(&dip->pending_bios);
-
 	map_length = orig_bio->bi_size;
 	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
 			      &map_length, NULL, 0);
 	if (ret) {
-		bio_put(bio);
+		bio_put(orig_bio);
 		return -EIO;
 	}
 
+	if (map_length >= orig_bio->bi_size) {
+		bio = orig_bio;
+		goto submit;
+	}
+
+	async_submit = 1;
+	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_private = dip;
+	bio->bi_end_io = btrfs_end_dio_bio;
+	atomic_inc(&dip->pending_bios);
+
 	while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
 		if (unlikely(map_length < submit_len + bvec->bv_len ||
 		    bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6015,7 +6071,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 			atomic_inc(&dip->pending_bios);
 			ret = __btrfs_submit_dio_bio(bio, inode, rw,
 						     file_offset, skip_sum,
-						     csums);
+						     csums, async_submit);
 			if (ret) {
 				bio_put(bio);
 				atomic_dec(&dip->pending_bios);
@@ -6052,8 +6108,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 		}
 	}
 
+submit:
 	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-				     csums);
+				     csums, async_submit);
 	if (!ret)
 		return 0;
 
@@ -6148,6 +6205,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 			unsigned long nr_segs)
 {
 	int seg;
+	int i;
 	size_t size;
 	unsigned long addr;
 	unsigned blocksize_mask = root->sectorsize - 1;
@@ -6162,8 +6220,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 		addr = (unsigned long)iov[seg].iov_base;
 		size = iov[seg].iov_len;
 		end += size;
-		if ((addr & blocksize_mask) || (size & blocksize_mask)) 
+		if ((addr & blocksize_mask) || (size & blocksize_mask))
 			goto out;
+
+		/* If this is a write we don't need to check anymore */
+		if (rw & WRITE)
+			continue;
+
+		/*
+		 * Check to make sure we don't have duplicate iov_base's in this
+		 * iovec, if so return EINVAL, otherwise we'll get csum errors
+		 * when reading back.
+		 */
+		for (i = seg + 1; i < nr_segs; i++) {
+			if (iov[seg].iov_base == iov[i].iov_base)
+				goto out;
+		}
 	}
 	retval = 0;
 out:
@@ -7206,9 +7278,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
 				&index);
-	err = PTR_ERR(inode);
-	if (IS_ERR(inode))
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out_unlock;
+	}
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
 	if (err) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cfc264fefdb0..2616f7ed4799 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -81,6 +81,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
 		iflags |= FS_NOATIME_FL;
 	if (flags & BTRFS_INODE_DIRSYNC)
 		iflags |= FS_DIRSYNC_FL;
+	if (flags & BTRFS_INODE_NODATACOW)
+		iflags |= FS_NOCOW_FL;
+
+	if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
+		iflags |= FS_COMPR_FL;
+	else if (flags & BTRFS_INODE_NOCOMPRESS)
+		iflags |= FS_NOCOMP_FL;
 
 	return iflags;
 }
@@ -144,16 +151,13 @@ static int check_flags(unsigned int flags)
 	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
 		      FS_NOATIME_FL | FS_NODUMP_FL | \
 		      FS_SYNC_FL | FS_DIRSYNC_FL | \
-		      FS_NOCOMP_FL | FS_COMPR_FL | \
-		      FS_NOCOW_FL | FS_COW_FL))
+		      FS_NOCOMP_FL | FS_COMPR_FL |
+		      FS_NOCOW_FL))
 		return -EOPNOTSUPP;
 
 	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
 		return -EINVAL;
 
-	if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
-		return -EINVAL;
-
 	return 0;
 }
 
@@ -218,6 +222,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 		ip->flags |= BTRFS_INODE_DIRSYNC;
 	else
 		ip->flags &= ~BTRFS_INODE_DIRSYNC;
+	if (flags & FS_NOCOW_FL)
+		ip->flags |= BTRFS_INODE_NODATACOW;
+	else
+		ip->flags &= ~BTRFS_INODE_NODATACOW;
 
 	/*
 	 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -230,11 +238,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 	} else if (flags & FS_COMPR_FL) {
 		ip->flags |= BTRFS_INODE_COMPRESS;
 		ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+	} else {
+		ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
 	}
-	if (flags & FS_NOCOW_FL)
-		ip->flags |= BTRFS_INODE_NODATACOW;
-	else if (flags & FS_COW_FL)
-		ip->flags &= ~BTRFS_INODE_NODATACOW;
 
 	trans = btrfs_join_transaction(root, 1);
 	BUG_ON(IS_ERR(trans));
@@ -2287,7 +2293,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 	struct btrfs_ioctl_space_info space;
 	struct btrfs_ioctl_space_info *dest;
 	struct btrfs_ioctl_space_info *dest_orig;
-	struct btrfs_ioctl_space_info *user_dest;
+	struct btrfs_ioctl_space_info __user *user_dest;
 	struct btrfs_space_info *info;
 	u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
 		       BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 58e7de9cc90c..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
 	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
 	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
 	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-	Opt_enospc_debug, Opt_err,
+	Opt_enospc_debug, Opt_subvolrootid, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
 	{Opt_clear_cache, "clear_cache"},
 	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
 	{Opt_enospc_debug, "enospc_debug"},
+	{Opt_subvolrootid, "subvolrootid=%d"},
 	{Opt_err, NULL},
 };
 
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			break;
 		case Opt_subvol:
 		case Opt_subvolid:
+		case Opt_subvolrootid:
 		case Opt_device:
 			/*
 			 * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
  */
 static int btrfs_parse_early_options(const char *options, fmode_t flags,
 		void *holder, char **subvol_name, u64 *subvol_objectid,
-		struct btrfs_fs_devices **fs_devices)
+		u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 					*subvol_objectid = intarg;
 			}
 			break;
+		case Opt_subvolrootid:
+			intarg = 0;
+			error = match_int(&args[0], &intarg);
+			if (!error) {
+				/* we want the original fs_tree */
+				if (!intarg)
+					*subvol_rootid =
+						BTRFS_FS_TREE_OBJECTID;
+				else
+					*subvol_rootid = intarg;
+			}
+			break;
 		case Opt_device:
 			error = btrfs_scan_one_device(match_strdup(&args[0]),
 					flags, holder, fs_devices);
@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	fmode_t mode = FMODE_READ;
 	char *subvol_name = NULL;
 	u64 subvol_objectid = 0;
+	u64 subvol_rootid = 0;
 	int error = 0;
 
 	if (!(flags & MS_RDONLY))
@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 
 	error = btrfs_parse_early_options(data, mode, fs_type,
 					  &subvol_name, &subvol_objectid,
-					  &fs_devices);
+					  &subvol_rootid, &fs_devices);
 	if (error)
 		return ERR_PTR(error);
 
@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 		s->s_flags |= MS_ACTIVE;
 	}
 
-	root = get_default_root(s, subvol_objectid);
-	if (IS_ERR(root)) {
-		error = PTR_ERR(root);
-		deactivate_locked_super(s);
-		goto error_free_subvol_name;
-	}
 	/* if they gave us a subvolume name bind mount into that */
 	if (strcmp(subvol_name, ".")) {
 		struct dentry *new_root;
+
+		root = get_default_root(s, subvol_rootid);
+		if (IS_ERR(root)) {
+			error = PTR_ERR(root);
+			deactivate_locked_super(s);
+			goto error_free_subvol_name;
+		}
+
 		mutex_lock(&root->d_inode->i_mutex);
 		new_root = lookup_one_len(subvol_name, root,
 				      strlen(subvol_name));
@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 		}
 		dput(root);
 		root = new_root;
+	} else {
+		root = get_default_root(s, subvol_objectid);
+		if (IS_ERR(root)) {
+			error = PTR_ERR(root);
+			deactivate_locked_super(s);
+			goto error_free_subvol_name;
+		}
 	}
 
 	kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5b158da7e0bb..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
 
 static noinline void put_transaction(struct btrfs_transaction *transaction)
 {
-	WARN_ON(transaction->use_count == 0);
-	transaction->use_count--;
-	if (transaction->use_count == 0) {
-		list_del_init(&transaction->list);
+	WARN_ON(atomic_read(&transaction->use_count) == 0);
+	if (atomic_dec_and_test(&transaction->use_count)) {
 		memset(transaction, 0, sizeof(*transaction));
 		kmem_cache_free(btrfs_transaction_cachep, transaction);
 	}
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
 		if (!cur_trans)
 			return -ENOMEM;
 		root->fs_info->generation++;
-		cur_trans->num_writers = 1;
+		atomic_set(&cur_trans->num_writers, 1);
 		cur_trans->num_joined = 0;
 		cur_trans->transid = root->fs_info->generation;
 		init_waitqueue_head(&cur_trans->writer_wait);
 		init_waitqueue_head(&cur_trans->commit_wait);
 		cur_trans->in_commit = 0;
 		cur_trans->blocked = 0;
-		cur_trans->use_count = 1;
+		atomic_set(&cur_trans->use_count, 1);
 		cur_trans->commit_done = 0;
 		cur_trans->start_time = get_seconds();
 
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
 		root->fs_info->running_transaction = cur_trans;
 		spin_unlock(&root->fs_info->new_trans_lock);
 	} else {
-		cur_trans->num_writers++;
+		atomic_inc(&cur_trans->num_writers);
 		cur_trans->num_joined++;
 	}
 
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
 	cur_trans = root->fs_info->running_transaction;
 	if (cur_trans && cur_trans->blocked) {
 		DEFINE_WAIT(wait);
-		cur_trans->use_count++;
+		atomic_inc(&cur_trans->use_count);
 		while (1) {
 			prepare_to_wait(&root->fs_info->transaction_wait, &wait,
 					TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
 	struct btrfs_trans_handle *h;
 	struct btrfs_transaction *cur_trans;
+	int retries = 0;
 	int ret;
 
 	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -204,7 +203,7 @@ again:
 	}
 
 	cur_trans = root->fs_info->running_transaction;
-	cur_trans->use_count++;
+	atomic_inc(&cur_trans->use_count);
 	if (type != TRANS_JOIN_NOLOCK)
 		mutex_unlock(&root->fs_info->trans_mutex);
 
@@ -224,10 +223,18 @@ again:
 
 	if (num_items > 0) {
 		ret = btrfs_trans_reserve_metadata(h, root, num_items);
-		if (ret == -EAGAIN) {
+		if (ret == -EAGAIN && !retries) {
+			retries++;
 			btrfs_commit_transaction(h, root);
 			goto again;
+		} else if (ret == -EAGAIN) {
+			/*
+			 * We have already retried and got EAGAIN, so really we
+			 * don't have space, so set ret to -ENOSPC.
+			 */
+			ret = -ENOSPC;
 		}
+
 		if (ret < 0) {
 			btrfs_end_transaction(h, root);
 			return ERR_PTR(ret);
@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 			goto out_unlock;  /* nothing committing|committed */
 	}
 
-	cur_trans->use_count++;
+	atomic_inc(&cur_trans->use_count);
 	mutex_unlock(&root->fs_info->trans_mutex);
 
 	wait_for_commit(root, cur_trans);
@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 			wake_up_process(info->transaction_kthread);
 	}
 
-	if (lock)
-		mutex_lock(&info->trans_mutex);
 	WARN_ON(cur_trans != info->running_transaction);
-	WARN_ON(cur_trans->num_writers < 1);
-	cur_trans->num_writers--;
+	WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
+	atomic_dec(&cur_trans->num_writers);
 
 	smp_mb();
 	if (waitqueue_active(&cur_trans->writer_wait))
 		wake_up(&cur_trans->writer_wait);
 	put_transaction(cur_trans);
-	if (lock)
-		mutex_unlock(&info->trans_mutex);
 
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 	/* take transaction reference */
 	mutex_lock(&root->fs_info->trans_mutex);
 	cur_trans = trans->transaction;
-	cur_trans->use_count++;
+	atomic_inc(&cur_trans->use_count);
 	mutex_unlock(&root->fs_info->trans_mutex);
 
 	btrfs_end_transaction(trans, root);
@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 	mutex_lock(&root->fs_info->trans_mutex);
 	if (cur_trans->in_commit) {
-		cur_trans->use_count++;
+		atomic_inc(&cur_trans->use_count);
 		mutex_unlock(&root->fs_info->trans_mutex);
 		btrfs_end_transaction(trans, root);
 
@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 		prev_trans = list_entry(cur_trans->list.prev,
 					struct btrfs_transaction, list);
 		if (!prev_trans->commit_done) {
-			prev_trans->use_count++;
+			atomic_inc(&prev_trans->use_count);
 			mutex_unlock(&root->fs_info->trans_mutex);
 
 			wait_for_commit(root, prev_trans);
@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 				TASK_UNINTERRUPTIBLE);
 
 		smp_mb();
-		if (cur_trans->num_writers > 1)
+		if (atomic_read(&cur_trans->num_writers) > 1)
 			schedule_timeout(MAX_SCHEDULE_TIMEOUT);
 		else if (should_grow)
 			schedule_timeout(1);
 
 		mutex_lock(&root->fs_info->trans_mutex);
 		finish_wait(&cur_trans->writer_wait, &wait);
-	} while (cur_trans->num_writers > 1 ||
+	} while (atomic_read(&cur_trans->num_writers) > 1 ||
 		 (should_grow && cur_trans->num_joined != joined));
 
 	ret = create_pending_snapshots(trans, root->fs_info);
@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 	wake_up(&cur_trans->commit_wait);
 
+	list_del_init(&cur_trans->list);
 	put_transaction(cur_trans);
 	put_transaction(cur_trans);
 
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
 	 * total writers in this transaction, it must be zero before the
 	 * transaction can end
 	 */
-	unsigned long num_writers;
+	atomic_t num_writers;
 
 	unsigned long num_joined;
 	int in_commit;
-	int use_count;
+	atomic_t use_count;
 	int commit_done;
 	int blocked;
 	struct list_head list;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c50271ad3157..f997ec0c1ba4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2209,8 +2209,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 
 	log = root->log_root;
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	if (!path) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
 
 	di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
 				   name, name_len, -1);
@@ -2271,6 +2273,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 	}
 fail:
 	btrfs_free_path(path);
+out_unlock:
 	mutex_unlock(&BTRFS_I(dir)->log_mutex);
 	if (ret == -ENOSPC) {
 		root->fs_info->last_trans_log_full_commit = trans->transid;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 309a57b9fc85..c7367ae5a3e6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -155,6 +155,15 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 	unsigned long limit;
 	unsigned long last_waited = 0;
 	int force_reg = 0;
+	struct blk_plug plug;
+
+	/*
+	 * this function runs all the bios we've collected for
+	 * a particular device.  We don't want to wander off to
+	 * another device without first sending all of these down.
+	 * So, setup a plug here and finish it off before we return
+	 */
+	blk_start_plug(&plug);
 
 	bdi = blk_get_backing_dev_info(device->bdev);
 	fs_info = device->dev_root->fs_info;
@@ -294,6 +303,7 @@ loop_lock:
 	spin_unlock(&device->io_lock);
 
 done:
+	blk_finish_plug(&plug);
 	return 0;
 }
 
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5303b871b13..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_dir_item *di;
-	int ret = 0, slot, advance;
+	int ret = 0, slot;
 	size_t total_size = 0, size_left = size;
 	unsigned long name_ptr;
 	size_t name_len;
-	u32 nritems;
 
 	/*
 	 * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto err;
-	advance = 0;
+
 	while (1) {
 		leaf = path->nodes[0];
-		nritems = btrfs_header_nritems(leaf);
 		slot = path->slots[0];
 
 		/* this is where we start walking through the path */
-		if (advance || slot >= nritems) {
+		if (slot >= btrfs_header_nritems(leaf)) {
 			/*
 			 * if we've reached the last slot in this leaf we need
 			 * to go to the next leaf and reset everything
 			 */
-			if (slot >= nritems-1) {
-				ret = btrfs_next_leaf(root, path);
-				if (ret)
-					break;
-				leaf = path->nodes[0];
-				nritems = btrfs_header_nritems(leaf);
-				slot = path->slots[0];
-			} else {
-				/*
-				 * just walking through the slots on this leaf
-				 */
-				slot++;
-				path->slots[0]++;
-			}
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto err;
+			else if (ret > 0)
+				break;
+			continue;
 		}
-		advance = 1;
 
 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 		/* we are just looking for how big our buffer needs to be */
 		if (!size)
-			continue;
+			goto next;
 
 		if (!buffer || (name_len + 1) > size_left) {
 			ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 		size_left -= name_len + 1;
 		buffer += name_len + 1;
+next:
+		path->slots[0]++;
 	}
 	ret = total_size;
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e159c529fd2b..38b8ab554924 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -775,6 +775,13 @@ get_more_pages:
 					    ci->i_truncate_seq,
 					    ci->i_truncate_size,
 					    &inode->i_mtime, true, 1, 0);
+
+				if (!req) {
+					rc = -ENOMEM;
+					unlock_page(page);
+					break;
+				}
+
 				max_pages = req->r_num_pages;
 
 				alloc_page_vec(fsc, req);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5323c330bbf3..2a5404c1c42f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -819,7 +819,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
 		used |= CEPH_CAP_FILE_CACHE;
 	if (ci->i_wr_ref)
 		used |= CEPH_CAP_FILE_WR;
-	if (ci->i_wrbuffer_ref)
+	if (ci->i_wb_ref || ci->i_wrbuffer_ref)
 		used |= CEPH_CAP_FILE_BUFFER;
 	return used;
 }
@@ -1331,10 +1331,11 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 }
 
 /*
- * Mark caps dirty.  If inode is newly dirty, add to the global dirty
- * list.
+ * Mark caps dirty.  If inode is newly dirty, return the dirty flags.
+ * Caller is then responsible for calling __mark_inode_dirty with the
+ * returned flags value.
  */
-void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 {
 	struct ceph_mds_client *mdsc =
 		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1357,7 +1358,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 		list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
 		spin_unlock(&mdsc->cap_dirty_lock);
 		if (ci->i_flushing_caps == 0) {
-			igrab(inode);
+			ihold(inode);
 			dirty |= I_DIRTY_SYNC;
 		}
 	}
@@ -1365,9 +1366,8 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
 	    (mask & CEPH_CAP_FILE_BUFFER))
 		dirty |= I_DIRTY_DATASYNC;
-	if (dirty)
-		__mark_inode_dirty(inode, dirty);
 	__cap_delay_requeue(mdsc, ci);
+	return dirty;
 }
 
 /*
@@ -1990,11 +1990,11 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
 	if (got & CEPH_CAP_FILE_WR)
 		ci->i_wr_ref++;
 	if (got & CEPH_CAP_FILE_BUFFER) {
-		if (ci->i_wrbuffer_ref == 0)
-			igrab(&ci->vfs_inode);
-		ci->i_wrbuffer_ref++;
-		dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n",
-		     &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref);
+		if (ci->i_wb_ref == 0)
+			ihold(&ci->vfs_inode);
+		ci->i_wb_ref++;
+		dout("__take_cap_refs %p wb %d -> %d (?)\n",
+		     &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
 	}
 }
 
@@ -2169,12 +2169,12 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 		if (--ci->i_rdcache_ref == 0)
 			last++;
 	if (had & CEPH_CAP_FILE_BUFFER) {
-		if (--ci->i_wrbuffer_ref == 0) {
+		if (--ci->i_wb_ref == 0) {
 			last++;
 			put++;
 		}
-		dout("put_cap_refs %p wrbuffer %d -> %d (?)\n",
-		     inode, ci->i_wrbuffer_ref+1, ci->i_wrbuffer_ref);
+		dout("put_cap_refs %p wb %d -> %d (?)\n",
+		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
 	}
 	if (had & CEPH_CAP_FILE_WR)
 		if (--ci->i_wr_ref == 0) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 159b512d5a27..203252d88d9f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -734,9 +734,12 @@ retry_snap:
 		}
 	}
 	if (ret >= 0) {
+		int dirty;
 		spin_lock(&inode->i_lock);
-		__ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
 		spin_unlock(&inode->i_lock);
+		if (dirty)
+			__mark_inode_dirty(inode, dirty);
 	}
 
 out:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index b54c97da1c43..70b6a4839c38 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -355,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_rd_ref = 0;
 	ci->i_rdcache_ref = 0;
 	ci->i_wr_ref = 0;
+	ci->i_wb_ref = 0;
 	ci->i_wrbuffer_ref = 0;
 	ci->i_wrbuffer_ref_head = 0;
 	ci->i_shared_gen = 0;
@@ -1567,6 +1568,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 	int release = 0, dirtied = 0;
 	int mask = 0;
 	int err = 0;
+	int inode_dirty_flags = 0;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
@@ -1725,13 +1727,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 		dout("setattr %p ATTR_FILE ... hrm!\n", inode);
 
 	if (dirtied) {
-		__ceph_mark_dirty_caps(ci, dirtied);
+		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
 		inode->i_ctime = CURRENT_TIME;
 	}
 
 	release &= issued;
 	spin_unlock(&inode->i_lock);
 
+	if (inode_dirty_flags)
+		__mark_inode_dirty(inode, inode_dirty_flags);
+
 	if (mask) {
 		req->r_inode = igrab(inode);
 		req->r_inode_drop = release;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f60b07b0feb0..d0fae4ce9ba5 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3304,8 +3304,8 @@ static void con_put(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 
+	dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
 	ceph_put_mds_session(s);
-	dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref));
 }
 
 /*
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e86ec1155f8f..24067d68a554 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -206,7 +206,7 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
 		up_write(&mdsc->snap_rwsem);
 	} else {
 		spin_lock(&mdsc->snap_empty_lock);
-		list_add(&mdsc->snap_empty, &realm->empty_item);
+		list_add(&realm->empty_item, &mdsc->snap_empty);
 		spin_unlock(&mdsc->snap_empty_lock);
 	}
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 619fe719968f..f5cabefa98dc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,7 +293,7 @@ struct ceph_inode_info {
 
 	/* held references to caps */
 	int i_pin_ref;
-	int i_rd_ref, i_rdcache_ref, i_wr_ref;
+	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
@@ -506,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
 {
 	return ci->i_dirty_caps | ci->i_flushing_caps;
 }
-extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
 
 extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
 extern int __ceph_caps_used(struct ceph_inode_info *ci);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c9eba6ef9df..f2b628696180 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -703,6 +703,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
 	struct ceph_inode_xattr *xattr = NULL;
 	int issued;
 	int required_blob_size;
+	int dirty;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
@@ -763,11 +764,12 @@ retry:
 	dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
 	err = __set_xattr(ci, newname, name_len, newval,
 			  val_len, 1, 1, 1, &xattr);
-	__ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
 	ci->i_xattrs.dirty = true;
 	inode->i_ctime = CURRENT_TIME;
 	spin_unlock(&inode->i_lock);
-
+	if (dirty)
+		__mark_inode_dirty(inode, dirty);
 	return err;
 
 do_sync:
@@ -810,6 +812,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
 	struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
 	int issued;
 	int err;
+	int dirty;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
@@ -833,12 +836,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
 		goto do_sync;
 
 	err = __remove_xattr_by_name(ceph_inode(inode), name);
-	__ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
 	ci->i_xattrs.dirty = true;
 	inode->i_ctime = CURRENT_TIME;
 
 	spin_unlock(&inode->i_lock);
-
+	if (dirty)
+		__mark_inode_dirty(inode, dirty);
 	return err;
 do_sync:
 	spin_unlock(&inode->i_lock);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..75c47cd8d086 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
 	select CRYPTO_MD5
 	select CRYPTO_HMAC
 	select CRYPTO_ARC4
+	select CRYPTO_DES
 	help
 	  This is the client VFS module for the Common Internet File System
 	  (CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,28 @@ config CIFS_ACL
 	    Allows to fetch CIFS/NTFS ACL from the server.  The DACL blob
 	    is handed over to the application/caller.
 
-config CIFS_EXPERIMENTAL
-	  bool "CIFS Experimental Features (EXPERIMENTAL)"
+config CIFS_SMB2
+	bool "SMB2 network file system support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && INET && BROKEN
+	select NLS
+	select KEYS
+	select FSCACHE
+	select DNS_RESOLVER
+
+	help
+	  This enables experimental support for the SMB2 (Server Message Block
+	  version 2) protocol. The SMB2 protocol is the successor to the
+	  popular CIFS and SMB network file sharing protocols. SMB2 is the
+	  native file sharing mechanism for recent versions of Windows
+	  operating systems (since Vista).  SMB2 enablement will eventually
+	  allow users better performance, security and features, than would be
+	  possible with cifs. Note that smb2 mount options also are simpler
+	  (compared to cifs) due to protocol improvements.
+
+	  Unless you are a developer or tester, say N.
+
+config CIFS_NFSD_EXPORT
+	  bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
 	  depends on CIFS && EXPERIMENTAL
 	  help
-	    Enables cifs features under testing. These features are
-	    experimental and currently include DFS support and directory
-	    change notification ie fcntl(F_DNOTIFY), as well as the upcall
-	    mechanism which will be used for Kerberos session negotiation
-	    and uid remapping.  Some of these features also may depend on
-	    setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
-	    (which is disabled by default). See the file fs/cifs/README
-	    for more details.  If unsure, say N.
-
+	   Allows NFS server to export a CIFS mounted share (nfsd over cifs)
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
 obj-$(CONFIG_CIFS) += cifs.o
 
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
-	  link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
+	  link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
 	  cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
 	  readdir.o ioctl.o sess.o export.o
 
diff --git a/fs/cifs/README b/fs/cifs/README
index fe1683590828..4a3ca0e5ca24 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -685,22 +685,6 @@ LinuxExtensionsEnabled	If set to one then the client will attempt to
 			support and want to map the uid and gid fields 
 			to values supplied at mount (rather than the 
 			actual values, then set this to zero. (default 1)
-Experimental            When set to 1 used to enable certain experimental
-			features (currently enables multipage writes
-			when signing is enabled, the multipage write
-			performance enhancement was disabled when
-			signing turned on in case buffer was modified
-			just before it was sent, also this flag will
-			be used to use the new experimental directory change 
-			notification code).  When set to 2 enables
-			an additional experimental feature, "raw ntlmssp"
-			session establishment support (which allows
-			specifying "sec=ntlmssp" on mount). The Linux cifs
-			module will use ntlmv2 authentication encapsulated
-			in "raw ntlmssp" (not using SPNEGO) when
-			"sec=ntlmssp" is specified on mount.
-			This support also requires building cifs with
-			the CONFIG_CIFS_EXPERIMENTAL configuration flag.
 
 These experimental features and tracing can be enabled by changing flags in 
 /proc/fs/cifs (after the cifs module has been installed or built into the 
@@ -720,18 +704,6 @@ the start of smb requests and responses can be enabled via:
 
 	echo 1 > /proc/fs/cifs/traceSMB
 
-Two other experimental features are under development. To test these
-requires enabling CONFIG_CIFS_EXPERIMENTAL
-
-	cifsacl support needed to retrieve approximated mode bits based on
-		the contents on the CIFS ACL.
-
-	lease support: cifs will check the oplock state before calling into
-	the vfs to see if we can grant a lease on a file.
-
-	DNOTIFY fcntl: needed for support of directory change 
-			    notification and perhaps later for file leases)
-
 Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
 if the kernel was configured with cifs statistics enabled.  The statistics
 represent the number of successful (ie non-zero return code from the server) 
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index e654dfd092c3..53d57a3fe427 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -50,7 +50,7 @@ void cifs_fscache_unregister(void)
  */
 struct cifs_server_key {
 	uint16_t	family;		/* address family */
-	uint16_t	port;		/* IP port */
+	__be16		port;		/* IP port */
 	union {
 		struct in_addr	ipv4_addr;
 		struct in6_addr	ipv6_addr;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 65829d32128c..18f4272d9047 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
 	cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
 		  smb->Command, smb->Status.CifsError,
 		  smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
-	cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb));
+	cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
 }
 
 
@@ -423,7 +423,6 @@ static const struct file_operations cifs_lookup_cache_proc_fops;
 static const struct file_operations traceSMB_proc_fops;
 static const struct file_operations cifs_multiuser_mount_proc_fops;
 static const struct file_operations cifs_security_flags_proc_fops;
-static const struct file_operations cifs_experimental_proc_fops;
 static const struct file_operations cifs_linux_ext_proc_fops;
 
 void
@@ -441,8 +440,6 @@ cifs_proc_init(void)
 	proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
 	proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
 	proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
-	proc_create("Experimental", 0, proc_fs_cifs,
-		    &cifs_experimental_proc_fops);
 	proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
 		    &cifs_linux_ext_proc_fops);
 	proc_create("MultiuserMount", 0, proc_fs_cifs,
@@ -469,7 +466,6 @@ cifs_proc_clean(void)
 	remove_proc_entry("OplockEnabled", proc_fs_cifs);
 	remove_proc_entry("SecurityFlags", proc_fs_cifs);
 	remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
-	remove_proc_entry("Experimental", proc_fs_cifs);
 	remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
 	remove_proc_entry("fs/cifs", NULL);
 }
@@ -550,45 +546,6 @@ static const struct file_operations cifs_oplock_proc_fops = {
 	.write		= cifs_oplock_proc_write,
 };
 
-static int cifs_experimental_proc_show(struct seq_file *m, void *v)
-{
-	seq_printf(m, "%d\n", experimEnabled);
-	return 0;
-}
-
-static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cifs_experimental_proc_show, NULL);
-}
-
-static ssize_t cifs_experimental_proc_write(struct file *file,
-		const char __user *buffer, size_t count, loff_t *ppos)
-{
-	char c;
-	int rc;
-
-	rc = get_user(c, buffer);
-	if (rc)
-		return rc;
-	if (c == '0' || c == 'n' || c == 'N')
-		experimEnabled = 0;
-	else if (c == '1' || c == 'y' || c == 'Y')
-		experimEnabled = 1;
-	else if (c == '2')
-		experimEnabled = 2;
-
-	return count;
-}
-
-static const struct file_operations cifs_experimental_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= cifs_experimental_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= cifs_experimental_proc_write,
-};
-
 static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "%d\n", linuxExtEnabled);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..a9d5692e0c20 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -58,9 +58,7 @@ struct cifs_sb_info {
 	unsigned int mnt_cifs_flags;
 	int	prepathlen;
 	char   *prepath; /* relative path under the share to mount to */
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	char   *mountdata; /* mount options received at mount time */
-#endif
+	char   *mountdata; /* options received at mount time or via DFS refs */
 	struct backing_dev_info bdi;
 	struct delayed_work prune_tlinks;
 };
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4dfba8283165..33d221394aca 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -113,7 +113,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 		   MAX_MECH_STR_LEN +
 		   UID_KEY_LEN + (sizeof(uid_t) * 2) +
 		   CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
-		   USER_KEY_LEN + strlen(sesInfo->userName) +
+		   USER_KEY_LEN + strlen(sesInfo->user_name) +
 		   PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
 
 	spnego_key = ERR_PTR(-ENOMEM);
@@ -153,7 +153,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 	sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
 
 	dp = description + strlen(description);
-	sprintf(dp, ";user=%s", sesInfo->userName);
+	sprintf(dp, ";user=%s", sesInfo->user_name);
 
 	dp = description + strlen(description);
 	sprintf(dp, ";pid=0x%x", current->pid);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index fc0fd4fde306..1b2e180b018d 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -90,7 +90,7 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 	case UNI_COLON:
 		*target = ':';
 		break;
-	case UNI_ASTERIK:
+	case UNI_ASTERISK:
 		*target = '*';
 		break;
 	case UNI_QUESTION:
@@ -264,40 +264,41 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
  * names are little endian 16 bit Unicode on the wire
  */
 int
-cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
+cifsConvertToUCS(__le16 *target, const char *source, int srclen,
 		 const struct nls_table *cp, int mapChars)
 {
 	int i, j, charlen;
-	int len_remaining = maxlen;
 	char src_char;
-	__u16 temp;
+	__le16 dst_char;
+	wchar_t tmp;
 
 	if (!mapChars)
 		return cifs_strtoUCS(target, source, PATH_MAX, cp);
 
-	for (i = 0, j = 0; i < maxlen; j++) {
+	for (i = 0, j = 0; i < srclen; j++) {
 		src_char = source[i];
+		charlen = 1;
 		switch (src_char) {
 		case 0:
-			put_unaligned_le16(0, &target[j]);
+			put_unaligned(0, &target[j]);
 			goto ctoUCS_out;
 		case ':':
-			temp = UNI_COLON;
+			dst_char = cpu_to_le16(UNI_COLON);
 			break;
 		case '*':
-			temp = UNI_ASTERIK;
+			dst_char = cpu_to_le16(UNI_ASTERISK);
 			break;
 		case '?':
-			temp = UNI_QUESTION;
+			dst_char = cpu_to_le16(UNI_QUESTION);
 			break;
 		case '<':
-			temp = UNI_LESSTHAN;
+			dst_char = cpu_to_le16(UNI_LESSTHAN);
 			break;
 		case '>':
-			temp = UNI_GRTRTHAN;
+			dst_char = cpu_to_le16(UNI_GRTRTHAN);
 			break;
 		case '|':
-			temp = UNI_PIPE;
+			dst_char = cpu_to_le16(UNI_PIPE);
 			break;
 		/*
 		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
@@ -305,28 +306,24 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
 		 * as they use backslash as separator.
 		 */
 		default:
-			charlen = cp->char2uni(source+i, len_remaining,
-						&temp);
+			charlen = cp->char2uni(source + i, srclen - i, &tmp);
+			dst_char = cpu_to_le16(tmp);
+
 			/*
 			 * if no match, use question mark, which at least in
 			 * some cases serves as wild card
 			 */
 			if (charlen < 1) {
-				temp = 0x003f;
+				dst_char = cpu_to_le16(0x003f);
 				charlen = 1;
 			}
-			len_remaining -= charlen;
-			/*
-			 * character may take more than one byte in the source
-			 * string, but will take exactly two bytes in the
-			 * target string
-			 */
-			i += charlen;
-			continue;
 		}
-		put_unaligned_le16(temp, &target[j]);
-		i++; /* move to next char in source string */
-		len_remaining--;
+		/*
+		 * character may take more than one byte in the source string,
+		 * but will take exactly two bytes in the target string
+		 */
+		i += charlen;
+		put_unaligned(dst_char, &target[j]);
 	}
 
 ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 7fe6b52df507..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -44,7 +44,7 @@
  * reserved symbols (along with \ and /), otherwise illegal to store
  * in filenames in NTFS
  */
-#define UNI_ASTERIK     (__u16) ('*' + 0xF000)
+#define UNI_ASTERISK    (__u16) ('*' + 0xF000)
 #define UNI_QUESTION    (__u16) ('?' + 0xF000)
 #define UNI_COLON       (__u16) (':' + 0xF000)
 #define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
 char *cifs_strndup_from_ucs(const char *src, const int maxlen,
 			    const bool is_unicode,
 			    const struct nls_table *codepage);
+extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
+			const struct nls_table *cp, int mapChars);
+
 #endif
 
 /*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..f3c6fb9942ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,24 +23,16 @@
 
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsacl.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
 
-
-static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
-	{{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
-	{{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
-	{{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
-	{{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
-	{{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
-	{{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
-	{{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
-;
-
-
 /* security id for everyone/world system group */
 static const struct cifs_sid sid_everyone = {
 	1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -50,50 +42,385 @@ static const struct cifs_sid sid_authusers = {
 /* group users */
 static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
 
+const struct cred *root_cred;
 
-int match_sid(struct cifs_sid *ctsid)
+static void
+shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
+			int *nr_del)
 {
-	int i, j;
-	int num_subauth, num_sat, num_saw;
-	struct cifs_sid *cwsid;
+	struct rb_node *node;
+	struct rb_node *tmp;
+	struct cifs_sid_id *psidid;
+
+	node = rb_first(root);
+	while (node) {
+		tmp = node;
+		node = rb_next(tmp);
+		psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
+		if (nr_to_scan == 0 || *nr_del == nr_to_scan)
+			++(*nr_rem);
+		else {
+			if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
+						&& psidid->refcount == 0) {
+				rb_erase(tmp, root);
+				++(*nr_del);
+			} else
+				++(*nr_rem);
+		}
+	}
+}
+
+/*
+ * Run idmap cache shrinker.
+ */
+static int
+cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+{
+	int nr_del = 0;
+	int nr_rem = 0;
+	struct rb_root *root;
+
+	root = &uidtree;
+	spin_lock(&siduidlock);
+	shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
+	spin_unlock(&siduidlock);
+
+	root = &gidtree;
+	spin_lock(&sidgidlock);
+	shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
+	spin_unlock(&sidgidlock);
+
+	return nr_rem;
+}
+
+static struct shrinker cifs_shrinker = {
+	.shrink = cifs_idmap_shrinker,
+	.seeks = DEFAULT_SEEKS,
+};
+
+static int
+cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
+{
+	char *payload;
+
+	payload = kmalloc(datalen, GFP_KERNEL);
+	if (!payload)
+		return -ENOMEM;
+
+	memcpy(payload, data, datalen);
+	key->payload.data = payload;
+	return 0;
+}
+
+static inline void
+cifs_idmap_key_destroy(struct key *key)
+{
+	kfree(key->payload.data);
+}
 
-	if (!ctsid)
-		return -1;
+struct key_type cifs_idmap_key_type = {
+	.name        = "cifs.idmap",
+	.instantiate = cifs_idmap_key_instantiate,
+	.destroy     = cifs_idmap_key_destroy,
+	.describe    = user_describe,
+	.match       = user_match,
+};
+
+static void
+sid_to_str(struct cifs_sid *sidptr, char *sidstr)
+{
+	int i;
+	unsigned long saval;
+	char *strptr;
 
-	for (i = 0; i < NUM_WK_SIDS; ++i) {
-		cwsid = &(wksidarr[i].cifssid);
+	strptr = sidstr;
 
-		/* compare the revision */
-		if (ctsid->revision != cwsid->revision)
-			continue;
+	sprintf(strptr, "%s", "S");
+	strptr = sidstr + strlen(sidstr);
 
-		/* compare all of the six auth values */
-		for (j = 0; j < 6; ++j) {
-			if (ctsid->authority[j] != cwsid->authority[j])
-				break;
+	sprintf(strptr, "-%d", sidptr->revision);
+	strptr = sidstr + strlen(sidstr);
+
+	for (i = 0; i < 6; ++i) {
+		if (sidptr->authority[i]) {
+			sprintf(strptr, "-%d", sidptr->authority[i]);
+			strptr = sidstr + strlen(sidstr);
 		}
-		if (j < 6)
-			continue; /* all of the auth values did not match */
-
-		/* compare all of the subauth values if any */
-		num_sat = ctsid->num_subauth;
-		num_saw = cwsid->num_subauth;
-		num_subauth = num_sat < num_saw ? num_sat : num_saw;
-		if (num_subauth) {
-			for (j = 0; j < num_subauth; ++j) {
-				if (ctsid->sub_auth[j] != cwsid->sub_auth[j])
-					break;
-			}
-			if (j < num_subauth)
-				continue; /* all sub_auth values do not match */
+	}
+
+	for (i = 0; i < sidptr->num_subauth; ++i) {
+		saval = le32_to_cpu(sidptr->sub_auth[i]);
+		sprintf(strptr, "-%ld", saval);
+		strptr = sidstr + strlen(sidstr);
+	}
+}
+
+static void
+id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
+		struct cifs_sid_id **psidid, char *typestr)
+{
+	int rc;
+	char *strptr;
+	struct rb_node *node = root->rb_node;
+	struct rb_node *parent = NULL;
+	struct rb_node **linkto = &(root->rb_node);
+	struct cifs_sid_id *lsidid;
+
+	while (node) {
+		lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
+		parent = node;
+		rc = compare_sids(sidptr, &((lsidid)->sid));
+		if (rc > 0) {
+			linkto = &(node->rb_left);
+			node = node->rb_left;
+		} else if (rc < 0) {
+			linkto = &(node->rb_right);
+			node = node->rb_right;
+		}
+	}
+
+	memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+	(*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
+	(*psidid)->refcount = 0;
+
+	sprintf((*psidid)->sidstr, "%s", typestr);
+	strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
+	sid_to_str(&(*psidid)->sid, strptr);
+
+	clear_bit(SID_ID_PENDING, &(*psidid)->state);
+	clear_bit(SID_ID_MAPPED, &(*psidid)->state);
+
+	rb_link_node(&(*psidid)->rbnode, parent, linkto);
+	rb_insert_color(&(*psidid)->rbnode, root);
+}
+
+static struct cifs_sid_id *
+id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
+{
+	int rc;
+	struct rb_node *node = root->rb_node;
+	struct cifs_sid_id *lsidid;
+
+	while (node) {
+		lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
+		rc = compare_sids(sidptr, &((lsidid)->sid));
+		if (rc > 0) {
+			node = node->rb_left;
+		} else if (rc < 0) {
+			node = node->rb_right;
+		} else /* node found */
+			return lsidid;
+	}
+
+	return NULL;
+}
+
+static int
+sidid_pending_wait(void *unused)
+{
+	schedule();
+	return signal_pending(current) ? -ERESTARTSYS : 0;
+}
+
+static int
+sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
+		struct cifs_fattr *fattr, uint sidtype)
+{
+	int rc;
+	unsigned long cid;
+	struct key *idkey;
+	const struct cred *saved_cred;
+	struct cifs_sid_id *psidid, *npsidid;
+	struct rb_root *cidtree;
+	spinlock_t *cidlock;
+
+	if (sidtype == SIDOWNER) {
+		cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
+		cidlock = &siduidlock;
+		cidtree = &uidtree;
+	} else if (sidtype == SIDGROUP) {
+		cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
+		cidlock = &sidgidlock;
+		cidtree = &gidtree;
+	} else
+		return -ENOENT;
+
+	spin_lock(cidlock);
+	psidid = id_rb_search(cidtree, psid);
+
+	if (!psidid) { /* node does not exist, allocate one & attempt adding */
+		spin_unlock(cidlock);
+		npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
+		if (!npsidid)
+			return -ENOMEM;
+
+		npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
+		if (!npsidid->sidstr) {
+			kfree(npsidid);
+			return -ENOMEM;
+		}
+
+		spin_lock(cidlock);
+		psidid = id_rb_search(cidtree, psid);
+		if (psidid) { /* node happened to get inserted meanwhile */
+			++psidid->refcount;
+			spin_unlock(cidlock);
+			kfree(npsidid->sidstr);
+			kfree(npsidid);
+		} else {
+			psidid = npsidid;
+			id_rb_insert(cidtree, psid, &psidid,
+					sidtype == SIDOWNER ? "os:" : "gs:");
+			++psidid->refcount;
+			spin_unlock(cidlock);
 		}
+	} else {
+		++psidid->refcount;
+		spin_unlock(cidlock);
+	}
+
+	/*
+	 * If we are here, it is safe to access psidid and its fields
+	 * since a reference was taken earlier while holding the spinlock.
+	 * A reference on the node is put without holding the spinlock
+	 * and it is OK to do so in this case, shrinker will not erase
+	 * this node until all references are put and we do not access
+	 * any fields of the node after a reference is put .
+	 */
+	if (test_bit(SID_ID_MAPPED, &psidid->state)) {
+		cid = psidid->id;
+		psidid->time = jiffies; /* update ts for accessing */
+		goto sid_to_id_out;
+	}
 
-		cFYI(1, "matching sid: %s\n", wksidarr[i].sidname);
-		return 0; /* sids compare/match */
+	if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
+		goto sid_to_id_out;
+
+	if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
+		saved_cred = override_creds(root_cred);
+		idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
+		if (IS_ERR(idkey))
+			cFYI(1, "%s: Can't map SID to an id", __func__);
+		else {
+			cid = *(unsigned long *)idkey->payload.value;
+			psidid->id = cid;
+			set_bit(SID_ID_MAPPED, &psidid->state);
+			key_put(idkey);
+			kfree(psidid->sidstr);
+		}
+		revert_creds(saved_cred);
+		psidid->time = jiffies; /* update ts for accessing */
+		clear_bit(SID_ID_PENDING, &psidid->state);
+		wake_up_bit(&psidid->state, SID_ID_PENDING);
+	} else {
+		rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
+				sidid_pending_wait, TASK_INTERRUPTIBLE);
+		if (rc) {
+			cFYI(1, "%s: sidid_pending_wait interrupted %d",
+					__func__, rc);
+			--psidid->refcount; /* decremented without spinlock */
+			return rc;
+		}
+		if (test_bit(SID_ID_MAPPED, &psidid->state))
+			cid = psidid->id;
 	}
 
-	cFYI(1, "No matching sid");
-	return -1;
+sid_to_id_out:
+	--psidid->refcount; /* decremented without spinlock */
+	if (sidtype == SIDOWNER)
+		fattr->cf_uid = cid;
+	else
+		fattr->cf_gid = cid;
+
+	return 0;
+}
+
+int
+init_cifs_idmap(void)
+{
+	struct cred *cred;
+	struct key *keyring;
+	int ret;
+
+	cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
+
+	/* create an override credential set with a special thread keyring in
+	 * which requests are cached
+	 *
+	 * this is used to prevent malicious redirections from being installed
+	 * with add_key().
+	 */
+	cred = prepare_kernel_cred(NULL);
+	if (!cred)
+		return -ENOMEM;
+
+	keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
+			    (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+			    KEY_USR_VIEW | KEY_USR_READ,
+			    KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(keyring)) {
+		ret = PTR_ERR(keyring);
+		goto failed_put_cred;
+	}
+
+	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+	if (ret < 0)
+		goto failed_put_key;
+
+	ret = register_key_type(&cifs_idmap_key_type);
+	if (ret < 0)
+		goto failed_put_key;
+
+	/* instruct request_key() to use this special keyring as a cache for
+	 * the results it looks up */
+	cred->thread_keyring = keyring;
+	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+	root_cred = cred;
+
+	spin_lock_init(&siduidlock);
+	uidtree = RB_ROOT;
+	spin_lock_init(&sidgidlock);
+	gidtree = RB_ROOT;
+
+	register_shrinker(&cifs_shrinker);
+
+	cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
+	return 0;
+
+failed_put_key:
+	key_put(keyring);
+failed_put_cred:
+	put_cred(cred);
+	return ret;
+}
+
+void
+exit_cifs_idmap(void)
+{
+	key_revoke(root_cred->thread_keyring);
+	unregister_key_type(&cifs_idmap_key_type);
+	put_cred(root_cred);
+	unregister_shrinker(&cifs_shrinker);
+	cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
+}
+
+void
+cifs_destroy_idmaptrees(void)
+{
+	struct rb_root *root;
+	struct rb_node *node;
+
+	root = &uidtree;
+	spin_lock(&siduidlock);
+	while ((node = rb_first(root)))
+		rb_erase(node, root);
+	spin_unlock(&siduidlock);
+
+	root = &gidtree;
+	spin_lock(&sidgidlock);
+	while ((node = rb_first(root)))
+		rb_erase(node, root);
+	spin_unlock(&sidgidlock);
 }
 
 /* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
 	int num_subauth, num_sat, num_saw;
 
 	if ((!ctsid) || (!cwsid))
-		return 0;
+		return 1;
 
 	/* compare the revision */
-	if (ctsid->revision != cwsid->revision)
-		return 0;
+	if (ctsid->revision != cwsid->revision) {
+		if (ctsid->revision > cwsid->revision)
+			return 1;
+		else
+			return -1;
+	}
 
 	/* compare all of the six auth values */
 	for (i = 0; i < 6; ++i) {
-		if (ctsid->authority[i] != cwsid->authority[i])
-			return 0;
+		if (ctsid->authority[i] != cwsid->authority[i]) {
+			if (ctsid->authority[i] > cwsid->authority[i])
+				return 1;
+			else
+				return -1;
+		}
 	}
 
 	/* compare all of the subauth values if any */
@@ -122,12 +457,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
 	num_subauth = num_sat < num_saw ? num_sat : num_saw;
 	if (num_subauth) {
 		for (i = 0; i < num_subauth; ++i) {
-			if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
-				return 0;
+			if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
+				if (ctsid->sub_auth[i] > cwsid->sub_auth[i])
+					return 1;
+				else
+					return -1;
+			}
 		}
 	}
 
-	return 1; /* sids compare/match */
+	return 0; /* sids compare/match */
 }
 
 
@@ -382,22 +721,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 #ifdef CONFIG_CIFS_DEBUG2
 			dump_ace(ppace[i], end_of_acl);
 #endif
-			if (compare_sids(&(ppace[i]->sid), pownersid))
+			if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
 						     &fattr->cf_mode,
 						     &user_mask);
-			if (compare_sids(&(ppace[i]->sid), pgrpsid))
+			if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
 						     &fattr->cf_mode,
 						     &group_mask);
-			if (compare_sids(&(ppace[i]->sid), &sid_everyone))
+			if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
 						     &fattr->cf_mode,
 						     &other_mask);
-			if (compare_sids(&(ppace[i]->sid), &sid_authusers))
+			if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
 						     &fattr->cf_mode,
@@ -475,10 +814,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
 
 
 /* Convert CIFS ACL to POSIX form */
-static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
-			  struct cifs_fattr *fattr)
+static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
+		struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
 {
-	int rc;
+	int rc = 0;
 	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
 	struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
 	char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +839,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 		 le32_to_cpu(pntsd->sacloffset), dacloffset);
 /*	cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
 	rc = parse_sid(owner_sid_ptr, end_of_acl);
-	if (rc)
+	if (rc) {
+		cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
+		return rc;
+	}
+	rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
+	if (rc) {
+		cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
 		return rc;
+	}
 
 	rc = parse_sid(group_sid_ptr, end_of_acl);
-	if (rc)
+	if (rc) {
+		cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
 		return rc;
+	}
+	rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
+	if (rc) {
+		cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
+		return rc;
+	}
 
 	if (dacloffset)
 		parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +873,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 	memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
 			sizeof(struct cifs_sid)); */
 
-	return 0;
+	return rc;
 }
 
 
@@ -688,7 +1041,7 @@ out:
 }
 
 /* Set an ACL on the server */
-static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
+int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 				struct inode *inode, const char *path)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1080,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 		rc = PTR_ERR(pntsd);
 		cERROR(1, "%s: error %d getting sec desc", __func__, rc);
 	} else {
-		rc = parse_sec_desc(pntsd, acllen, fattr);
+		rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
 		kfree(pntsd);
 		if (rc)
 			cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
 #define ACCESS_ALLOWED	0
 #define ACCESS_DENIED	1
 
+#define SIDOWNER 1
+#define SIDGROUP 2
+#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
+
+#define SID_ID_MAPPED 0
+#define SID_ID_PENDING 1
+#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
+#define SID_MAP_RETRY (300 * HZ)   /* wait 5 minutes for next attempt to map */
+
 struct cifs_ntsd {
 	__le16 revision; /* revision level */
 	__le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
 	char sidname[SIDNAMELENGTH];
 } __attribute__((packed));
 
-extern int match_sid(struct cifs_sid *);
+struct cifs_sid_id {
+	unsigned int refcount; /* increment with spinlock, decrement without */
+	unsigned long id;
+	unsigned long time;
+	unsigned long state;
+	char *sidstr;
+	struct rb_node rbnode;
+	struct cifs_sid sid;
+};
+
+#ifdef __KERNEL__
+extern struct key_type cifs_idmap_key_type;
+extern const struct cred *root_cred;
+#endif /* KERNEL */
+
 extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
 
 #endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index a51585f9852b..45c3f78c8f81 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -30,12 +30,13 @@
 #include <linux/ctype.h>
 #include <linux/random.h>
 
-/* Calculate and return the CIFS signature based on the mac key and SMB PDU */
-/* the 16 byte signature must be allocated by the caller  */
-/* Note we only use the 1st eight bytes */
-/* Note that the smb header signature field on input contains the
-	sequence number before this function is called */
-
+/*
+ * Calculate and return the CIFS signature based on the mac key and SMB PDU.
+ * The 16 byte signature must be allocated by the caller. Note we only use the
+ * 1st eight bytes and that the smb header signature field on input contains
+ * the sequence number before this function is called. Also, this function
+ * should be called with the server->srv_mutex held.
+ */
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
 				struct TCP_Server_Info *server, char *signature)
 {
@@ -59,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
 		server->session_key.response, server->session_key.len);
 
 	crypto_shash_update(&server->secmech.sdescmd5->shash,
-		cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+		cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
 
 	rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
 
@@ -209,8 +210,10 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 					cpu_to_le32(expected_sequence_number);
 	cifs_pdu->Signature.Sequence.Reserved = 0;
 
+	mutex_lock(&server->srv_mutex);
 	rc = cifs_calculate_signature(cifs_pdu, server,
 		what_we_think_sig_should_be);
+	mutex_unlock(&server->srv_mutex);
 
 	if (rc)
 		return rc;
@@ -265,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
 }
 
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
+int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
 			char *lnm_session_key)
 {
 	int i;
+	int rc;
 	char password_with_pad[CIFS_ENCPWD_SIZE];
 
 	memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -279,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
 		memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
 		memcpy(lnm_session_key, password_with_pad,
 			CIFS_ENCPWD_SIZE);
-		return;
+		return 0;
 	}
 
 	/* calculate old style session key */
@@ -296,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
 	for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
 		password_with_pad[i] = toupper(password_with_pad[i]);
 
-	SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
+	rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
 
-	/* clear password before we return/free memory */
-	memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
+	return rc;
 }
 #endif /* CIFS_WEAK_PW_HASH */
 
@@ -469,15 +472,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
 		return rc;
 	}
 
-	/* convert ses->userName to unicode and uppercase */
-	len = strlen(ses->userName);
+	/* convert ses->user_name to unicode and uppercase */
+	len = strlen(ses->user_name);
 	user = kmalloc(2 + (len * 2), GFP_KERNEL);
 	if (user == NULL) {
 		cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
 		rc = -ENOMEM;
 		goto calc_exit_2;
 	}
-	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
+	len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp);
 	UniStrupr(user);
 
 	crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f2970136d17d..493b74ca5648 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -53,7 +53,6 @@ int cifsFYI = 0;
 int cifsERROR = 1;
 int traceSMB = 0;
 unsigned int oplockEnabled = 1;
-unsigned int experimEnabled = 0;
 unsigned int linuxExtEnabled = 1;
 unsigned int lookupCacheEnabled = 1;
 unsigned int multiuser_mount = 0;
@@ -127,30 +126,24 @@ cifs_read_super(struct super_block *sb, void *data,
 		kfree(cifs_sb);
 		return rc;
 	}
+	cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
 
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	/* copy mount params to sb for use in submounts */
-	/* BB: should we move this after the mount so we
-	 * do not have to do the copy on failed mounts?
-	 * BB: May be it is better to do simple copy before
-	 * complex operation (mount), and in case of fail
-	 * just exit instead of doing mount and attempting
-	 * undo it if this copy fails?*/
+	/*
+	 * Copy mount params to sb for use in submounts. Better to do
+	 * the copy here and deal with the error before cleanup gets
+	 * complicated post-mount.
+	 */
 	if (data) {
-		int len = strlen(data);
-		cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
+		cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
 		if (cifs_sb->mountdata == NULL) {
 			bdi_destroy(&cifs_sb->bdi);
 			kfree(sb->s_fs_info);
 			sb->s_fs_info = NULL;
 			return -ENOMEM;
 		}
-		strncpy(cifs_sb->mountdata, data, len + 1);
-		cifs_sb->mountdata[len] = '\0';
 	}
-#endif
 
-	rc = cifs_mount(sb, cifs_sb, data, devname);
+	rc = cifs_mount(sb, cifs_sb, devname);
 
 	if (rc) {
 		if (!silent)
@@ -163,7 +156,7 @@ cifs_read_super(struct super_block *sb, void *data,
 	sb->s_bdi = &cifs_sb->bdi;
 	sb->s_blocksize = CIFS_MAX_MSGSIZE;
 	sb->s_blocksize_bits = 14;	/* default 2**14 = CIFS_MAX_MSGSIZE */
-	inode = cifs_root_iget(sb, ROOT_I);
+	inode = cifs_root_iget(sb);
 
 	if (IS_ERR(inode)) {
 		rc = PTR_ERR(inode);
@@ -184,12 +177,12 @@ cifs_read_super(struct super_block *sb, void *data,
 	else
 		sb->s_d_op = &cifs_dentry_ops;
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
 		cFYI(1, "export ops supported");
 		sb->s_export_op = &cifs_export_ops;
 	}
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
 
 	return 0;
 
@@ -202,12 +195,10 @@ out_no_root:
 
 out_mount_failed:
 	if (cifs_sb) {
-#ifdef CONFIG_CIFS_DFS_UPCALL
 		if (cifs_sb->mountdata) {
 			kfree(cifs_sb->mountdata);
 			cifs_sb->mountdata = NULL;
 		}
-#endif
 		unload_nls(cifs_sb->local_nls);
 		bdi_destroy(&cifs_sb->bdi);
 		kfree(cifs_sb);
@@ -231,12 +222,10 @@ cifs_put_super(struct super_block *sb)
 	rc = cifs_umount(sb, cifs_sb);
 	if (rc)
 		cERROR(1, "cifs_umount failed with return code %d", rc);
-#ifdef CONFIG_CIFS_DFS_UPCALL
 	if (cifs_sb->mountdata) {
 		kfree(cifs_sb->mountdata);
 		cifs_sb->mountdata = NULL;
 	}
-#endif
 
 	unload_nls(cifs_sb->local_nls);
 	bdi_destroy(&cifs_sb->bdi);
@@ -409,8 +398,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
 		seq_printf(s, ",multiuser");
-	else if (tcon->ses->userName)
-		seq_printf(s, ",username=%s", tcon->ses->userName);
+	else if (tcon->ses->user_name)
+		seq_printf(s, ",username=%s", tcon->ses->user_name);
 
 	if (tcon->ses->domainName)
 		seq_printf(s, ",domain=%s", tcon->ses->domainName);
@@ -618,16 +607,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 {
 	/* origin == SEEK_END => we must revalidate the cached file length */
 	if (origin == SEEK_END) {
-		int retval;
-
-		/* some applications poll for the file length in this strange
-		   way so we must seek to end on non-oplocked files by
-		   setting the revalidate time to zero */
-		CIFS_I(file->f_path.dentry->d_inode)->time = 0;
-
-		retval = cifs_revalidate_file(file);
-		if (retval < 0)
-			return (loff_t)retval;
+		int rc;
+		struct inode *inode = file->f_path.dentry->d_inode;
+
+		/*
+		 * We need to be sure that all dirty pages are written and the
+		 * server has the newest file length.
+		 */
+		if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
+		    inode->i_mapping->nrpages != 0) {
+			rc = filemap_fdatawait(inode->i_mapping);
+			if (rc) {
+				mapping_set_error(inode->i_mapping, rc);
+				return rc;
+			}
+		}
+		/*
+		 * Some applications poll for the file length in this strange
+		 * way so we must seek to end on non-oplocked files by
+		 * setting the revalidate time to zero.
+		 */
+		CIFS_I(inode)->time = 0;
+
+		rc = cifs_revalidate_file_attr(file);
+		if (rc < 0)
+			return (loff_t)rc;
 	}
 	return generic_file_llseek_unlocked(file, offset, origin);
 }
@@ -760,10 +764,11 @@ const struct file_operations cifs_file_strict_ops = {
 };
 
 const struct file_operations cifs_file_direct_ops = {
-	/* no aio, no readv -
-	   BB reevaluate whether they can be done with directio, no cache */
-	.read = cifs_user_read,
-	.write = cifs_user_write,
+	/* BB reevaluate whether they can be done with directio, no cache */
+	.read = do_sync_read,
+	.write = do_sync_write,
+	.aio_read = cifs_user_readv,
+	.aio_write = cifs_user_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.lock = cifs_lock,
@@ -815,10 +820,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 };
 
 const struct file_operations cifs_file_direct_nobrl_ops = {
-	/* no mmap, no aio, no readv -
-	   BB reevaluate whether they can be done with directio, no cache */
-	.read = cifs_user_read,
-	.write = cifs_user_write,
+	/* BB reevaluate whether they can be done with directio, no cache */
+	.read = do_sync_read,
+	.write = do_sync_write,
+	.aio_read = cifs_user_readv,
+	.aio_write = cifs_user_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.fsync = cifs_fsync,
@@ -981,10 +987,10 @@ init_cifs(void)
 	int rc = 0;
 	cifs_proc_init();
 	INIT_LIST_HEAD(&cifs_tcp_ses_list);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
 	INIT_LIST_HEAD(&GlobalDnotifyReqList);
 	INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
-#endif
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
 /*
  *  Initialize Global counters
  */
@@ -1033,22 +1039,33 @@ init_cifs(void)
 	if (rc)
 		goto out_destroy_mids;
 
-	rc = register_filesystem(&cifs_fs_type);
-	if (rc)
-		goto out_destroy_request_bufs;
 #ifdef CONFIG_CIFS_UPCALL
 	rc = register_key_type(&cifs_spnego_key_type);
 	if (rc)
-		goto out_unregister_filesystem;
-#endif
+		goto out_destroy_request_bufs;
+#endif /* CONFIG_CIFS_UPCALL */
+
+#ifdef CONFIG_CIFS_ACL
+	rc = init_cifs_idmap();
+	if (rc)
+		goto out_register_key_type;
+#endif /* CONFIG_CIFS_ACL */
+
+	rc = register_filesystem(&cifs_fs_type);
+	if (rc)
+		goto out_init_cifs_idmap;
 
 	return 0;
 
-#ifdef CONFIG_CIFS_UPCALL
-out_unregister_filesystem:
-	unregister_filesystem(&cifs_fs_type);
+out_init_cifs_idmap:
+#ifdef CONFIG_CIFS_ACL
+	exit_cifs_idmap();
+out_register_key_type:
 #endif
+#ifdef CONFIG_CIFS_UPCALL
+	unregister_key_type(&cifs_spnego_key_type);
 out_destroy_request_bufs:
+#endif
 	cifs_destroy_request_bufs();
 out_destroy_mids:
 	cifs_destroy_mids();
@@ -1070,6 +1087,10 @@ exit_cifs(void)
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	cifs_dfs_release_automount_timer();
 #endif
+#ifdef CONFIG_CIFS_ACL
+	cifs_destroy_idmaptrees();
+	exit_cifs_idmap();
+#endif
 #ifdef CONFIG_CIFS_UPCALL
 	unregister_key_type(&cifs_spnego_key_type);
 #endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
 
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
-extern struct inode *cifs_root_iget(struct super_block *, unsigned long);
+extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, int,
 		       struct nameidata *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
 extern int cifs_rmdir(struct inode *, struct dentry *);
 extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
 		       struct dentry *);
+extern int cifs_revalidate_file_attr(struct file *filp);
+extern int cifs_revalidate_dentry_attr(struct dentry *);
 extern int cifs_revalidate_file(struct file *filp);
 extern int cifs_revalidate_dentry(struct dentry *);
-extern void cifs_invalidate_mapping(struct inode *inode);
+extern int cifs_invalidate_mapping(struct inode *inode);
 extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int cifs_setattr(struct dentry *, struct iattr *);
 
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
-extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
-			      size_t read_size, loff_t *poffset);
+extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+			       unsigned long nr_segs, loff_t pos);
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
 				 unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
-			       size_t write_size, loff_t *poffset);
+extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos);
 extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
 				  unsigned long nr_segs, loff_t pos);
 extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "1.71"
+#define CIFS_VERSION   "1.72"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 17afb0fbcaed..76b4517e74b0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -37,10 +37,9 @@
 
 #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
 #define MAX_SERVER_SIZE 15
-#define MAX_SHARE_SIZE  64	/* used to be 20, this should still be enough */
-#define MAX_USERNAME_SIZE 32	/* 32 is to allow for 15 char names + null
-				   termination then *2 for unicode versions */
-#define MAX_PASSWORD_SIZE 512  /* max for windows seems to be 256 wide chars */
+#define MAX_SHARE_SIZE 80
+#define MAX_USERNAME_SIZE 256	/* reasonable maximum for current servers */
+#define MAX_PASSWORD_SIZE 512	/* max for windows seems to be 256 wide chars */
 
 #define CIFS_MIN_RCV_POOL 4
 
@@ -92,7 +91,8 @@ enum statusEnum {
 	CifsNew = 0,
 	CifsGood,
 	CifsExiting,
-	CifsNeedReconnect
+	CifsNeedReconnect,
+	CifsNeedNegotiate
 };
 
 enum securityEnum {
@@ -274,7 +274,8 @@ struct cifsSesInfo {
 	int capabilities;
 	char serverName[SERVER_NAME_LEN_WITH_NULL * 2];	/* BB make bigger for
 				TCP names - will ipv6 and sctp addresses fit? */
-	char userName[MAX_USERNAME_SIZE + 1];
+	char *user_name;	/* must not be null except during init of sess
+				   and after mount option parsing we fill it */
 	char *domainName;
 	char *password;
 	struct session_key auth_key;
@@ -780,10 +781,12 @@ GLOBAL_EXTERN spinlock_t		cifs_tcp_ses_lock;
  */
 GLOBAL_EXTERN spinlock_t	cifs_file_list_lock;
 
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
 /* Outstanding dir notify requests */
 GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
 /* DirNotify response queue */
 GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
 
 /*
  * Global transaction id (XID) information
@@ -817,7 +820,6 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
 				have the uid/password or Kerberos credential
 				or equivalent for current user */
 GLOBAL_EXTERN unsigned int oplockEnabled;
-GLOBAL_EXTERN unsigned int experimEnabled;
 GLOBAL_EXTERN unsigned int lookupCacheEnabled;
 GLOBAL_EXTERN unsigned int global_secflags;	/* if on, session setup sent
 				with more secure ntlmssp2 challenge/resp */
@@ -831,6 +833,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
 /* reconnect after this many failed echo attempts */
 GLOBAL_EXTERN unsigned short echo_retries;
 
+GLOBAL_EXTERN struct rb_root uidtree;
+GLOBAL_EXTERN struct rb_root gidtree;
+GLOBAL_EXTERN spinlock_t siduidlock;
+GLOBAL_EXTERN spinlock_t sidgidlock;
+
 void cifs_oplock_break(struct work_struct *work);
 void cifs_oplock_break_get(struct cifsFileInfo *cfile);
 void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
 #define GETU32(var)  (*((__u32 *)var))	/* BB check for endian issues */
 
 struct smb_hdr {
-	__u32 smb_buf_length;	/* big endian on wire *//* BB length is only two
-		or three bytes - with one or two byte type preceding it that are
-		zero - we could mask the type byte off just in case BB */
+	__be32 smb_buf_length;	/* BB length is only two (rarely three) bytes,
+		with one or two byte "type" preceding it that will be
+		zero - we could mask the type byte off */
 	__u8 Protocol[4];
 	__u8 Command;
 	union {
@@ -428,43 +428,28 @@ struct smb_hdr {
 	__u8 WordCount;
 } __attribute__((packed));
 
-/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */
-#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \
-			 (2 * (smb_var)->WordCount))
+/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
+static inline void *
+BCC(struct smb_hdr *smb)
+{
+	return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
+}
 
 /* given a pointer to an smb_hdr retrieve the pointer to the byte area */
 #define pByteArea(smb_var) (BCC(smb_var) + 2)
 
-/* get the converted ByteCount for a SMB packet and return it */
-static inline __u16
-get_bcc(struct smb_hdr *hdr)
-{
-	__u16 *bc_ptr = (__u16 *)BCC(hdr);
-
-	return get_unaligned(bc_ptr);
-}
-
 /* get the unconverted ByteCount for a SMB packet and return it */
 static inline __u16
-get_bcc_le(struct smb_hdr *hdr)
+get_bcc(struct smb_hdr *hdr)
 {
 	__le16 *bc_ptr = (__le16 *)BCC(hdr);
 
 	return get_unaligned_le16(bc_ptr);
 }
 
-/* set the ByteCount for a SMB packet in host-byte order */
-static inline void
-put_bcc(__u16 count, struct smb_hdr *hdr)
-{
-	__u16 *bc_ptr = (__u16 *)BCC(hdr);
-
-	put_unaligned(count, bc_ptr);
-}
-
 /* set the ByteCount for a SMB packet in little-endian */
 static inline void
-put_bcc_le(__u16 count, struct smb_hdr *hdr)
+put_bcc(__u16 count, struct smb_hdr *hdr)
 {
 	__le16 *bc_ptr = (__le16 *)BCC(hdr);
 
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..6e69e06a30b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,6 +53,9 @@ do {								\
 	cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d",	\
 	     __func__, curr_xid, (int)rc);			\
 } while (0)
+extern int init_cifs_idmap(void);
+extern void exit_cifs_idmap(void);
+extern void cifs_destroy_idmaptrees(void);
 extern char *build_path_from_dentry(struct dentry *);
 extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
 					struct cifsTconInfo *tcon);
@@ -90,7 +93,6 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
 extern unsigned int smbCalcSize(struct smb_hdr *ptr);
-extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			struct TCP_Server_Info *server);
 extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -143,8 +145,10 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
 extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
 extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
 					const char *, u32 *);
+extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
+				const char *);
 
-extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
+extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
 			const char *);
 extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
 extern void cifs_dfs_release_automount_timer(void);
@@ -304,12 +308,13 @@ extern int CIFSSMBUnixQuerySymLink(const int xid,
 			struct cifsTconInfo *tcon,
 			const unsigned char *searchName, char **syminfo,
 			const struct nls_table *nls_codepage);
+#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
 extern int CIFSSMBQueryReparseLinkInfo(const int xid,
 			struct cifsTconInfo *tcon,
 			const unsigned char *searchName,
 			char *symlinkinfo, const int buflen, __u16 fid,
 			const struct nls_table *nls_codepage);
-
+#endif /* temporarily unused until cifs_symlink fixed */
 extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
 			const char *fileName, const int disposition,
 			const int access_flags, const int omode,
@@ -348,8 +353,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName, __u64 *inode_number,
 			const struct nls_table *nls_codepage,
 			int remap_special_chars);
-extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
-			const struct nls_table *cp, int mapChars);
 
 extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
 			const __u16 netfid, const __u64 len,
@@ -383,9 +386,15 @@ extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
 extern int calc_seckey(struct cifsSesInfo *);
 
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-extern void calc_lanman_hash(const char *password, const char *cryptkey,
+extern int calc_lanman_hash(const char *password, const char *cryptkey,
 				bool encrypt, char *lnm_session_key);
 #endif /* CIFS_WEAK_PW_HASH */
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
+extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
+			const int notify_subdirs, const __u16 netfid,
+			__u32 filter, struct file *file, int multishot,
+			const struct nls_table *nls_codepage);
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
 extern int CIFSSMBCopy(int xid,
 			struct cifsTconInfo *source_tcon,
 			const char *fromName,
@@ -393,10 +402,6 @@ extern int CIFSSMBCopy(int xid,
 			const char *toName, const int flags,
 			const struct nls_table *nls_codepage,
 			int remap_special_chars);
-extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
-			const int notify_subdirs, const __u16 netfid,
-			__u32 filter, struct file *file, int multishot,
-			const struct nls_table *nls_codepage);
 extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName,
 			const unsigned char *ea_name, char *EAData,
@@ -427,9 +432,6 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
 		struct cifs_sb_info *cifs_sb, int xid);
 extern int mdfour(unsigned char *, unsigned char *, int);
 extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
-extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
-			unsigned char *p24);
-extern void E_P16(unsigned char *p14, unsigned char *p16);
-extern void E_P24(unsigned char *p21, const unsigned char *c8,
+extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 			unsigned char *p24);
 #endif			/* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2644a5d6cc67..83df937b814e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -142,9 +142,9 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
 	 */
 	while (server->tcpStatus == CifsNeedReconnect) {
 		wait_event_interruptible_timeout(server->response_q,
-			(server->tcpStatus == CifsGood), 10 * HZ);
+			(server->tcpStatus != CifsNeedReconnect), 10 * HZ);
 
-		/* is TCP session is reestablished now ?*/
+		/* are we still trying to reconnect? */
 		if (server->tcpStatus != CifsNeedReconnect)
 			break;
 
@@ -339,12 +339,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
 	    get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
 		goto vt2_err;
 
-	/* check that bcc is at least as big as parms + data */
-	/* check that bcc is less than negotiated smb buffer */
 	total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
 	if (total_size >= 512)
 		goto vt2_err;
 
+	/* check that bcc is at least as big as parms + data, and that it is
+	 * less than negotiated smb buffer
+	 */
 	total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
 	if (total_size > get_bcc(&pSMB->hdr) ||
 	    total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,6 +358,13 @@ vt2_err:
 	return -EINVAL;
 }
 
+static inline void inc_rfc1001_len(void *pSMB, int count)
+{
+	struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
+
+	be32_add_cpu(&hdr->smb_buf_length, count);
+}
+
 int
 CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 {
@@ -409,7 +417,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		count += strlen(protocols[i].name) + 1;
 		/* null at end of source and target buffers anyway */
 	}
-	pSMB->hdr.smb_buf_length += count;
+	inc_rfc1001_len(pSMB, count);
 	pSMB->ByteCount = cpu_to_le16(count);
 
 	rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -541,10 +549,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		server->secType = RawNTLMSSP;
 	else if (secFlags & CIFSSEC_MAY_LANMAN)
 		server->secType = LANMAN;
-/* #ifdef CONFIG_CIFS_EXPERIMENTAL
-	else if (secFlags & CIFSSEC_MAY_PLNTXT)
-		server->secType = ??
-#endif */
 	else {
 		rc = -EOPNOTSUPP;
 		cERROR(1, "Invalid security type");
@@ -578,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 
 	if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
 		(server->capabilities & CAP_EXTENDED_SECURITY)) {
-		count = pSMBr->ByteCount;
+		count = get_bcc(&pSMBr->hdr);
 		if (count < 16) {
 			rc = -EIO;
 			goto neg_err_exit;
@@ -729,12 +733,12 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
 		return rc;
 
 	/* set up echo request */
-	smb->hdr.Tid = cpu_to_le16(0xffff);
+	smb->hdr.Tid = 0xffff;
 	smb->hdr.WordCount = 1;
 	put_unaligned_le16(1, &smb->EchoCount);
-	put_bcc_le(1, &smb->hdr);
+	put_bcc(1, &smb->hdr);
 	smb->Data[0] = 'a';
-	smb->hdr.smb_buf_length += 3;
+	inc_rfc1001_len(smb, 3);
 
 	rc = cifs_call_async(server, (struct smb_hdr *)smb,
 				cifs_echo_callback, server);
@@ -852,7 +856,7 @@ PsxDelete:
 	pSMB->TotalParameterCount = pSMB->ParameterCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -898,7 +902,7 @@ DelFileRetry:
 	pSMB->SearchAttributes =
 	    cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
 	pSMB->BufferFormat = 0x04;
-	pSMB->hdr.smb_buf_length += name_len + 1;
+	inc_rfc1001_len(pSMB, name_len + 1);
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -942,7 +946,7 @@ RmDirRetry:
 	}
 
 	pSMB->BufferFormat = 0x04;
-	pSMB->hdr.smb_buf_length += name_len + 1;
+	inc_rfc1001_len(pSMB, name_len + 1);
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -985,7 +989,7 @@ MkDirRetry:
 	}
 
 	pSMB->BufferFormat = 0x04;
-	pSMB->hdr.smb_buf_length += name_len + 1;
+	inc_rfc1001_len(pSMB, name_len + 1);
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1063,7 +1067,7 @@ PsxCreat:
 	pSMB->TotalParameterCount = pSMB->ParameterCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1079,7 @@ PsxCreat:
 	cFYI(1, "copying inode info");
 	rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-	if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) {
+	if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
 		rc = -EIO;	/* bad smb */
 		goto psx_create_err;
 	}
@@ -1096,7 +1100,7 @@ PsxCreat:
 		pRetData->Type = cpu_to_le32(-1); /* unknown */
 		cFYI(DBG2, "unknown type");
 	} else {
-		if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
+		if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
 					+ sizeof(FILE_UNIX_BASIC_INFO)) {
 			cERROR(1, "Open response data too small");
 			pRetData->Type = cpu_to_le32(-1);
@@ -1228,7 +1232,7 @@ OldOpenRetry:
 	pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
 	pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
 	count += name_len;
-	pSMB->hdr.smb_buf_length += count;
+	inc_rfc1001_len(pSMB, count);
 
 	pSMB->ByteCount = cpu_to_le16(count);
 	/* long_op set to 1 to allow for oplock break timeouts */
@@ -1341,7 +1345,7 @@ openRetry:
 	    SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
 
 	count += name_len;
-	pSMB->hdr.smb_buf_length += count;
+	inc_rfc1001_len(pSMB, count);
 
 	pSMB->ByteCount = cpu_to_le16(count);
 	/* long_op set to 1 to allow for oplock break timeouts */
@@ -1426,7 +1430,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
 	}
 
 	iov[0].iov_base = (char *)pSMB;
-	iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+	iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
 	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
 			 &resp_buf_type, CIFS_LOG_ERROR);
 	cifs_stats_inc(&tcon->num_reads);
@@ -1560,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 
 	pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
 	pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 
 	if (wct == 14)
 		pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1644,11 +1648,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 
 	pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
 	pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
-	smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */
+	/* header + 1 byte pad */
+	smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
 	if (wct == 14)
-		pSMB->hdr.smb_buf_length += count+1;
+		inc_rfc1001_len(pSMB, count + 1);
 	else /* wct == 12 */
-		pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */
+		inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
 	if (wct == 14)
 		pSMB->ByteCount = cpu_to_le16(count + 1);
 	else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1748,7 +1753,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
 		/* oplock break */
 		count = 0;
 	}
-	pSMB->hdr.smb_buf_length += count;
+	inc_rfc1001_len(pSMB, count);
 	pSMB->ByteCount = cpu_to_le16(count);
 
 	if (waitFlag) {
@@ -1839,14 +1844,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
 	pSMB->Fid = smb_file_id;
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	if (waitFlag) {
 		rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
 			(struct smb_hdr *) pSMBr, &bytes_returned);
 	} else {
 		iov[0].iov_base = (char *)pSMB;
-		iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+		iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
 		rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
 				&resp_buf_type, timeout);
 		pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +1867,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
 		__u16 data_count;
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) {
+		if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
 			rc = -EIO;      /* bad smb */
 			goto plk_err_exit;
 		}
@@ -1884,10 +1889,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
 					__constant_cpu_to_le16(CIFS_WRLCK))
 				pLockData->fl_type = F_WRLCK;
 
-			pLockData->fl_start = parm_data->start;
-			pLockData->fl_end = parm_data->start +
-						parm_data->length - 1;
-			pLockData->fl_pid = parm_data->pid;
+			pLockData->fl_start = le64_to_cpu(parm_data->start);
+			pLockData->fl_end = pLockData->fl_start +
+					le64_to_cpu(parm_data->length) - 1;
+			pLockData->fl_pid = le32_to_cpu(parm_data->pid);
 		}
 	}
 
@@ -2012,7 +2017,7 @@ renameRetry:
 	}
 
 	count = 1 /* 1st signature byte */  + name_len + name_len2;
-	pSMB->hdr.smb_buf_length += count;
+	inc_rfc1001_len(pSMB, count);
 	pSMB->ByteCount = cpu_to_le16(count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2092,7 +2097,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
 	pSMB->InformationLevel =
 		cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2159,7 +2164,7 @@ copyRetry:
 	}
 
 	count = 1 /* 1st signature byte */  + name_len + name_len2;
-	pSMB->hdr.smb_buf_length += count;
+	inc_rfc1001_len(pSMB, count);
 	pSMB->ByteCount = cpu_to_le16(count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2249,7 +2254,7 @@ createSymLinkRetry:
 	pSMB->DataOffset = cpu_to_le16(offset);
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2335,7 +2340,7 @@ createHardLinkRetry:
 	pSMB->DataOffset = cpu_to_le16(offset);
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2406,7 +2411,7 @@ winCreateHardLinkRetry:
 	}
 
 	count = 1 /* string type byte */  + name_len + name_len2;
-	pSMB->hdr.smb_buf_length += count;
+	inc_rfc1001_len(pSMB, count);
 	pSMB->ByteCount = cpu_to_le16(count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2477,7 +2482,7 @@ querySymLinkRetry:
 	pSMB->ParameterCount = pSMB->TotalParameterCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2494,7 @@ querySymLinkRetry:
 
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 		/* BB also check enough total bytes returned */
-		if (rc || (pSMBr->ByteCount < 2))
+		if (rc || get_bcc(&pSMBr->hdr) < 2)
 			rc = -EIO;
 		else {
 			bool is_unicode;
@@ -2516,7 +2521,17 @@ querySymLinkRetry:
 	return rc;
 }
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
+/*
+ *	Recent Windows versions now create symlinks more frequently
+ *	and they use the "reparse point" mechanism below.  We can of course
+ *	do symlinks nicely to Samba and other servers which support the
+ *	CIFS Unix Extensions and we can also do SFU symlinks and "client only"
+ *	"MF" symlinks optionally, but for recent Windows we really need to
+ *	reenable the code below and fix the cifs_symlink callers to handle this.
+ *	In the interim this code has been moved to its own config option so
+ *	it is not compiled in by default until callers fixed up and more tested.
+ */
 int
 CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName,
@@ -2561,14 +2576,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 	} else {		/* decode response */
 		__u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
 		__u32 data_count = le32_to_cpu(pSMBr->DataCount);
-		if ((pSMBr->ByteCount < 2) || (data_offset > 512)) {
-		/* BB also check enough total bytes returned */
+		if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
+			/* BB also check enough total bytes returned */
 			rc = -EIO;	/* bad smb */
 			goto qreparse_out;
 		}
 		if (data_count && (data_count < 2048)) {
 			char *end_of_smb = 2 /* sizeof byte count */ +
-				pSMBr->ByteCount + (char *)&pSMBr->ByteCount;
+			       get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
 
 			struct reparse_data *reparse_buf =
 						(struct reparse_data *)
@@ -2618,7 +2633,7 @@ qreparse_out:
 
 	return rc;
 }
-#endif /* CIFS_EXPERIMENTAL */
+#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
 
 #ifdef CONFIG_CIFS_POSIX
 
@@ -2814,7 +2829,7 @@ queryAclRetry:
 	pSMB->ParameterCount = pSMB->TotalParameterCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +2841,8 @@ queryAclRetry:
 		/* decode response */
 
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-		if (rc || (pSMBr->ByteCount < 2))
 		/* BB also check enough total bytes returned */
+		if (rc || get_bcc(&pSMBr->hdr) < 2)
 			rc = -EIO;      /* bad smb */
 		else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2908,7 +2923,7 @@ setAclRetry:
 	pSMB->ParameterCount = cpu_to_le16(params);
 	pSMB->TotalParameterCount = pSMB->ParameterCount;
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2966,7 +2981,7 @@ GetExtAttrRetry:
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
 	pSMB->Pad = 0;
 	pSMB->Fid = netfid;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->t2.ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +2991,8 @@ GetExtAttrRetry:
 	} else {
 		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-		if (rc || (pSMBr->ByteCount < 2))
 		/* BB also check enough total bytes returned */
+		if (rc || get_bcc(&pSMBr->hdr) < 2)
 			/* If rc should we check for EOPNOSUPP and
 			   disable the srvino flag? or in caller? */
 			rc = -EIO;      /* bad smb */
@@ -3052,6 +3067,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
 	char *end_of_smb;
 	__u32 data_count, data_offset, parm_count, parm_offset;
 	struct smb_com_ntransact_rsp *pSMBr;
+	u16 bcc;
 
 	*pdatalen = 0;
 	*pparmlen = 0;
@@ -3061,8 +3077,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
 
 	pSMBr = (struct smb_com_ntransact_rsp *)buf;
 
-	/* ByteCount was converted from little endian in SendReceive */
-	end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
+	bcc = get_bcc(&pSMBr->hdr);
+	end_of_smb = 2 /* sizeof byte count */ + bcc +
 			(char *)&pSMBr->ByteCount;
 
 	data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3104,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
 			*ppdata, data_count, (data_count + *ppdata),
 			end_of_smb, pSMBr);
 		return -EINVAL;
-	} else if (parm_count + data_count > pSMBr->ByteCount) {
+	} else if (parm_count + data_count > bcc) {
 		cFYI(1, "parm count and data count larger than SMB");
 		return -EINVAL;
 	}
@@ -3124,9 +3140,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 	pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
 				     CIFS_ACL_DACL);
 	pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
-	pSMB->hdr.smb_buf_length += 11;
+	inc_rfc1001_len(pSMB, 11);
 	iov[0].iov_base = (char *)pSMB;
-	iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+	iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
 
 	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
 			 0);
@@ -3235,10 +3251,9 @@ setCifsAclRetry:
 		memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
 			(char *) pntsd,
 			acllen);
-		pSMB->hdr.smb_buf_length += (byte_count + data_count);
-
+		inc_rfc1001_len(pSMB, byte_count + data_count);
 	} else
-		pSMB->hdr.smb_buf_length += byte_count;
+		inc_rfc1001_len(pSMB, byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 		(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3289,7 +3304,7 @@ QInfRetry:
 	}
 	pSMB->BufferFormat = 0x04;
 	name_len++; /* account for buffer type byte */
-	pSMB->hdr.smb_buf_length += (__u16) name_len;
+	inc_rfc1001_len(pSMB, (__u16)name_len);
 	pSMB->ByteCount = cpu_to_le16(name_len);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3364,7 +3379,7 @@ QFileInfoRetry:
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
 	pSMB->Pad = 0;
 	pSMB->Fid = netfid;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3390,7 @@ QFileInfoRetry:
 
 		if (rc) /* BB add auto retry on EOPNOTSUPP? */
 			rc = -EIO;
-		else if (pSMBr->ByteCount < 40)
+		else if (get_bcc(&pSMBr->hdr) < 40)
 			rc = -EIO;	/* bad smb */
 		else if (pFindData) {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3451,7 +3466,7 @@ QPathInfoRetry:
 	else
 		pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3478,9 @@ QPathInfoRetry:
 
 		if (rc) /* BB add auto retry on EOPNOTSUPP? */
 			rc = -EIO;
-		else if (!legacy && (pSMBr->ByteCount < 40))
+		else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
 			rc = -EIO;	/* bad smb */
-		else if (legacy && (pSMBr->ByteCount < 24))
+		else if (legacy && get_bcc(&pSMBr->hdr) < 24)
 			rc = -EIO;  /* 24 or 26 expected but we do not read
 					last field */
 		else if (pFindData) {
@@ -3532,7 +3547,7 @@ UnixQFileInfoRetry:
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
 	pSMB->Pad = 0;
 	pSMB->Fid = netfid;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3556,7 @@ UnixQFileInfoRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+		if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
 			cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
 				   "Unix Extensions can be disabled on mount "
 				   "by specifying the nosfu mount option.");
@@ -3617,7 +3632,7 @@ UnixQPathInfoRetry:
 	pSMB->ParameterCount = pSMB->TotalParameterCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3642,7 @@ UnixQPathInfoRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+		if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
 			cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
 				   "Unix Extensions can be disabled on mount "
 				   "by specifying the nosfu mount option.");
@@ -3731,7 +3746,7 @@ findFirstRetry:
 
 	/* BB what should we set StorageType to? Does it matter? BB */
 	pSMB->SearchStorageType = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3860,7 +3875,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
 	byte_count = params + 1 /* pad */ ;
 	pSMB->TotalParameterCount = cpu_to_le16(params);
 	pSMB->ParameterCount = pSMB->TotalParameterCount;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4022,7 +4037,7 @@ GetInodeNumberRetry:
 	pSMB->ParameterCount = pSMB->TotalParameterCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4047,8 @@ GetInodeNumberRetry:
 	} else {
 		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-		if (rc || (pSMBr->ByteCount < 2))
 		/* BB also check enough total bytes returned */
+		if (rc || get_bcc(&pSMBr->hdr) < 2)
 			/* If rc should we check for EOPNOSUPP and
 			disable the srvino flag? or in caller? */
 			rc = -EIO;      /* bad smb */
@@ -4246,7 +4261,7 @@ getDFSRetry:
 	pSMB->ParameterCount = cpu_to_le16(params);
 	pSMB->TotalParameterCount = pSMB->ParameterCount;
 	pSMB->MaxReferralLevel = cpu_to_le16(3);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4273,13 @@ getDFSRetry:
 	rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
 	/* BB Also check if enough total bytes returned? */
-	if (rc || (pSMBr->ByteCount < 17)) {
+	if (rc || get_bcc(&pSMBr->hdr) < 17) {
 		rc = -EIO;      /* bad smb */
 		goto GetDFSRefExit;
 	}
 
 	cFYI(1, "Decoding GetDFSRefer response BCC: %d  Offset %d",
-				pSMBr->ByteCount,
+				get_bcc(&pSMBr->hdr),
 				le16_to_cpu(pSMBr->t2.DataOffset));
 
 	/* parse returned result into more usable form */
@@ -4320,7 +4335,7 @@ oldQFSInfoRetry:
 	pSMB->Reserved3 = 0;
 	pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
 	pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4345,12 @@ oldQFSInfoRetry:
 	} else {                /* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < 18))
+		if (rc || get_bcc(&pSMBr->hdr) < 18)
 			rc = -EIO;      /* bad smb */
 		else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
 			cFYI(1, "qfsinf resp BCC: %d  Offset %d",
-				 pSMBr->ByteCount, data_offset);
+				 get_bcc(&pSMBr->hdr), data_offset);
 
 			response_data = (FILE_SYSTEM_ALLOC_INFO *)
 				(((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4399,7 +4414,7 @@ QFSInfoRetry:
 	pSMB->Reserved3 = 0;
 	pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4424,7 @@ QFSInfoRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < 24))
+		if (rc || get_bcc(&pSMBr->hdr) < 24)
 			rc = -EIO;	/* bad smb */
 		else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4479,7 +4494,7 @@ QFSAttributeRetry:
 	pSMB->Reserved3 = 0;
 	pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4504,7 @@ QFSAttributeRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < 13)) {
+		if (rc || get_bcc(&pSMBr->hdr) < 13) {
 			/* BB also check if enough bytes returned */
 			rc = -EIO;	/* bad smb */
 		} else {
@@ -4550,7 +4565,7 @@ QFSDeviceRetry:
 	pSMB->Reserved3 = 0;
 	pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4575,8 @@ QFSDeviceRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO)))
+		if (rc || get_bcc(&pSMBr->hdr) <
+			  sizeof(FILE_SYSTEM_DEVICE_INFO))
 			rc = -EIO;	/* bad smb */
 		else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4619,7 +4635,7 @@ QFSUnixRetry:
 	pSMB->Reserved3 = 0;
 	pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4645,7 @@ QFSUnixRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < 13)) {
+		if (rc || get_bcc(&pSMBr->hdr) < 13) {
 			rc = -EIO;	/* bad smb */
 		} else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4702,7 +4718,7 @@ SETFSUnixRetry:
 	pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
 	pSMB->ClientUnixCap = cpu_to_le64(cap);
 
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4764,7 +4780,7 @@ QFSPosixRetry:
 	pSMB->Reserved3 = 0;
 	pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
 	pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +4790,7 @@ QFSPosixRetry:
 	} else {		/* decode response */
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
-		if (rc || (pSMBr->ByteCount < 13)) {
+		if (rc || get_bcc(&pSMBr->hdr) < 13) {
 			rc = -EIO;	/* bad smb */
 		} else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4890,7 +4906,7 @@ SetEOFRetry:
 	pSMB->ParameterCount = cpu_to_le16(params);
 	pSMB->TotalParameterCount = pSMB->ParameterCount;
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	parm_data->FileSize = cpu_to_le64(size);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4969,7 +4985,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
 				cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
 	}
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
 	if (rc) {
@@ -5037,7 +5053,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
 	else
 		pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
 	rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5096,7 +5112,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
 	pSMB->Fid = fid;
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	*data_offset = delete_file ? 1 : 0;
 	rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5169,7 +5185,7 @@ SetTimesRetry:
 	else
 		pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5221,7 +5237,7 @@ SetAttrLgcyRetry:
 	}
 	pSMB->attr = cpu_to_le16(dos_attrs);
 	pSMB->BufferFormat = 0x04;
-	pSMB->hdr.smb_buf_length += name_len + 1;
+	inc_rfc1001_len(pSMB, name_len + 1);
 	pSMB->ByteCount = cpu_to_le16(name_len + 1);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5326,7 +5342,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
 	pSMB->Fid = fid;
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	cifs_fill_unix_set_info(data_offset, args);
@@ -5402,7 +5418,7 @@ setPermsRetry:
 	pSMB->TotalDataCount = pSMB->DataCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 
 	cifs_fill_unix_set_info(data_offset, args);
 
@@ -5418,79 +5434,6 @@ setPermsRetry:
 	return rc;
 }
 
-int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
-		  const int notify_subdirs, const __u16 netfid,
-		  __u32 filter, struct file *pfile, int multishot,
-		  const struct nls_table *nls_codepage)
-{
-	int rc = 0;
-	struct smb_com_transaction_change_notify_req *pSMB = NULL;
-	struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
-	struct dir_notify_req *dnotify_req;
-	int bytes_returned;
-
-	cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
-	rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
-		      (void **) &pSMBr);
-	if (rc)
-		return rc;
-
-	pSMB->TotalParameterCount = 0 ;
-	pSMB->TotalDataCount = 0;
-	pSMB->MaxParameterCount = cpu_to_le32(2);
-	/* BB find exact data count max from sess structure BB */
-	pSMB->MaxDataCount = 0; /* same in little endian or be */
-/* BB VERIFY verify which is correct for above BB */
-	pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
-					     MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
-
-	pSMB->MaxSetupCount = 4;
-	pSMB->Reserved = 0;
-	pSMB->ParameterOffset = 0;
-	pSMB->DataCount = 0;
-	pSMB->DataOffset = 0;
-	pSMB->SetupCount = 4; /* single byte does not need le conversion */
-	pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
-	pSMB->ParameterCount = pSMB->TotalParameterCount;
-	if (notify_subdirs)
-		pSMB->WatchTree = 1; /* one byte - no le conversion needed */
-	pSMB->Reserved2 = 0;
-	pSMB->CompletionFilter = cpu_to_le32(filter);
-	pSMB->Fid = netfid; /* file handle always le */
-	pSMB->ByteCount = 0;
-
-	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
-			 (struct smb_hdr *)pSMBr, &bytes_returned,
-			 CIFS_ASYNC_OP);
-	if (rc) {
-		cFYI(1, "Error in Notify = %d", rc);
-	} else {
-		/* Add file to outstanding requests */
-		/* BB change to kmem cache alloc */
-		dnotify_req = kmalloc(
-						sizeof(struct dir_notify_req),
-						 GFP_KERNEL);
-		if (dnotify_req) {
-			dnotify_req->Pid = pSMB->hdr.Pid;
-			dnotify_req->PidHigh = pSMB->hdr.PidHigh;
-			dnotify_req->Mid = pSMB->hdr.Mid;
-			dnotify_req->Tid = pSMB->hdr.Tid;
-			dnotify_req->Uid = pSMB->hdr.Uid;
-			dnotify_req->netfid = netfid;
-			dnotify_req->pfile = pfile;
-			dnotify_req->filter = filter;
-			dnotify_req->multishot = multishot;
-			spin_lock(&GlobalMid_Lock);
-			list_add_tail(&dnotify_req->lhead,
-					&GlobalDnotifyReqList);
-			spin_unlock(&GlobalMid_Lock);
-		} else
-			rc = -ENOMEM;
-	}
-	cifs_buf_release(pSMB);
-	return rc;
-}
-
 #ifdef CONFIG_CIFS_XATTR
 /*
  * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5560,7 +5503,7 @@ QAllEAsRetry:
 	pSMB->ParameterCount = pSMB->TotalParameterCount;
 	pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5519,7 @@ QAllEAsRetry:
 	of these trans2 responses */
 
 	rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-	if (rc || (pSMBr->ByteCount < 4)) {
+	if (rc || get_bcc(&pSMBr->hdr) < 4) {
 		rc = -EIO;	/* bad smb */
 		goto QAllEAsOut;
 	}
@@ -5773,7 +5716,7 @@ SetEARetry:
 	pSMB->ParameterCount = cpu_to_le16(params);
 	pSMB->TotalParameterCount = pSMB->ParameterCount;
 	pSMB->Reserved4 = 0;
-	pSMB->hdr.smb_buf_length += byte_count;
+	inc_rfc1001_len(pSMB, byte_count);
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5730,99 @@ SetEARetry:
 
 	return rc;
 }
-
 #endif
+
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
+/*
+ *	Years ago the kernel added a "dnotify" function for Samba server,
+ *	to allow network clients (such as Windows) to display updated
+ *	lists of files in directory listings automatically when
+ *	files are added by one user when another user has the
+ *	same directory open on their desktop.  The Linux cifs kernel
+ *	client hooked into the kernel side of this interface for
+ *	the same reason, but ironically when the VFS moved from
+ *	"dnotify" to "inotify" it became harder to plug in Linux
+ *	network file system clients (the most obvious use case
+ *	for notify interfaces is when multiple users can update
+ *	the contents of the same directory - exactly what network
+ *	file systems can do) although the server (Samba) could
+ *	still use it.  For the short term we leave the worker
+ *	function ifdeffed out (below) until inotify is fixed
+ *	in the VFS to make it easier to plug in network file
+ *	system clients.  If inotify turns out to be permanently
+ *	incompatible for network fs clients, we could instead simply
+ *	expose this config flag by adding a future cifs (and smb2) notify ioctl.
+ */
+int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
+		  const int notify_subdirs, const __u16 netfid,
+		  __u32 filter, struct file *pfile, int multishot,
+		  const struct nls_table *nls_codepage)
+{
+	int rc = 0;
+	struct smb_com_transaction_change_notify_req *pSMB = NULL;
+	struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
+	struct dir_notify_req *dnotify_req;
+	int bytes_returned;
+
+	cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
+	rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
+		      (void **) &pSMBr);
+	if (rc)
+		return rc;
+
+	pSMB->TotalParameterCount = 0 ;
+	pSMB->TotalDataCount = 0;
+	pSMB->MaxParameterCount = cpu_to_le32(2);
+	/* BB find exact data count max from sess structure BB */
+	pSMB->MaxDataCount = 0; /* same in little endian or be */
+/* BB VERIFY verify which is correct for above BB */
+	pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+					     MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
+
+	pSMB->MaxSetupCount = 4;
+	pSMB->Reserved = 0;
+	pSMB->ParameterOffset = 0;
+	pSMB->DataCount = 0;
+	pSMB->DataOffset = 0;
+	pSMB->SetupCount = 4; /* single byte does not need le conversion */
+	pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
+	pSMB->ParameterCount = pSMB->TotalParameterCount;
+	if (notify_subdirs)
+		pSMB->WatchTree = 1; /* one byte - no le conversion needed */
+	pSMB->Reserved2 = 0;
+	pSMB->CompletionFilter = cpu_to_le32(filter);
+	pSMB->Fid = netfid; /* file handle always le */
+	pSMB->ByteCount = 0;
+
+	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+			 (struct smb_hdr *)pSMBr, &bytes_returned,
+			 CIFS_ASYNC_OP);
+	if (rc) {
+		cFYI(1, "Error in Notify = %d", rc);
+	} else {
+		/* Add file to outstanding requests */
+		/* BB change to kmem cache alloc */
+		dnotify_req = kmalloc(
+						sizeof(struct dir_notify_req),
+						 GFP_KERNEL);
+		if (dnotify_req) {
+			dnotify_req->Pid = pSMB->hdr.Pid;
+			dnotify_req->PidHigh = pSMB->hdr.PidHigh;
+			dnotify_req->Mid = pSMB->hdr.Mid;
+			dnotify_req->Tid = pSMB->hdr.Tid;
+			dnotify_req->Uid = pSMB->hdr.Uid;
+			dnotify_req->netfid = netfid;
+			dnotify_req->pfile = pfile;
+			dnotify_req->filter = filter;
+			dnotify_req->multishot = multishot;
+			spin_lock(&GlobalMid_Lock);
+			list_add_tail(&dnotify_req->lhead,
+					&GlobalDnotifyReqList);
+			spin_unlock(&GlobalMid_Lock);
+		} else
+			rc = -ENOMEM;
+	}
+	cifs_buf_release(pSMB);
+	return rc;
+}
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6e2b2addfc78..da284e3cb653 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
 	bool fsc:1;	/* enable fscache */
 	bool mfsymlinks:1; /* use Minshall+French Symlinks */
 	bool multiuser:1;
+	bool use_smb2:1; /* force smb2 use on mount instead of cifs */
 	unsigned int rsize;
 	unsigned int wsize;
 	bool sockopt_tcp_nodelay:1;
@@ -199,8 +200,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	}
 	spin_unlock(&GlobalMid_Lock);
 
-	while ((server->tcpStatus != CifsExiting) &&
-	       (server->tcpStatus != CifsGood)) {
+	while (server->tcpStatus == CifsNeedReconnect) {
 		try_to_freeze();
 
 		/* we should try only the port we connected to before */
@@ -212,7 +212,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 			atomic_inc(&tcpSesReconnectCount);
 			spin_lock(&GlobalMid_Lock);
 			if (server->tcpStatus != CifsExiting)
-				server->tcpStatus = CifsGood;
+				server->tcpStatus = CifsNeedNegotiate;
 			spin_unlock(&GlobalMid_Lock);
 		}
 	}
@@ -248,24 +248,24 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
 	total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
 	data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
 
-	remaining = total_data_size - data_in_this_rsp;
-
-	if (remaining == 0)
+	if (total_data_size == data_in_this_rsp)
 		return 0;
-	else if (remaining < 0) {
+	else if (total_data_size < data_in_this_rsp) {
 		cFYI(1, "total data %d smaller than data in frame %d",
 			total_data_size, data_in_this_rsp);
 		return -EINVAL;
-	} else {
-		cFYI(1, "missing %d bytes from transact2, check next response",
-			remaining);
-		if (total_data_size > maxBufSize) {
-			cERROR(1, "TotalDataSize %d is over maximum buffer %d",
-				total_data_size, maxBufSize);
-			return -EINVAL;
-		}
-		return remaining;
 	}
+
+	remaining = total_data_size - data_in_this_rsp;
+
+	cFYI(1, "missing %d bytes from transact2, check next response",
+		remaining);
+	if (total_data_size > maxBufSize) {
+		cERROR(1, "TotalDataSize %d is over maximum buffer %d",
+			total_data_size, maxBufSize);
+		return -EINVAL;
+	}
+	return remaining;
 }
 
 static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
@@ -275,7 +275,8 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
 	char *data_area_of_target;
 	char *data_area_of_buf2;
 	int remaining;
-	__u16 byte_count, total_data_size, total_in_buf, total_in_buf2;
+	unsigned int byte_count, total_in_buf;
+	__u16 total_data_size, total_in_buf2;
 
 	total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
 
@@ -288,7 +289,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
 	remaining = total_data_size - total_in_buf;
 
 	if (remaining < 0)
-		return -EINVAL;
+		return -EPROTO;
 
 	if (remaining == 0) /* nothing to do, ignore */
 		return 0;
@@ -309,19 +310,28 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
 	data_area_of_target += total_in_buf;
 
 	/* copy second buffer into end of first buffer */
-	memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
 	total_in_buf += total_in_buf2;
+	/* is the result too big for the field? */
+	if (total_in_buf > USHRT_MAX)
+		return -EPROTO;
 	put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
-	byte_count = get_bcc_le(pTargetSMB);
-	byte_count += total_in_buf2;
-	put_bcc_le(byte_count, pTargetSMB);
 
-	byte_count = pTargetSMB->smb_buf_length;
+	/* fix up the BCC */
+	byte_count = get_bcc(pTargetSMB);
 	byte_count += total_in_buf2;
+	/* is the result too big for the field? */
+	if (byte_count > USHRT_MAX)
+		return -EPROTO;
+	put_bcc(byte_count, pTargetSMB);
 
-	/* BB also add check that we are not beyond maximum buffer size */
+	byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
+	byte_count += total_in_buf2;
+	/* don't allow buffer to overflow */
+	if (byte_count > CIFSMaxBufSize)
+		return -ENOBUFS;
+	pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
 
-	pTargetSMB->smb_buf_length = byte_count;
+	memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
 
 	if (remaining == total_in_buf2) {
 		cFYI(1, "found the last secondary response");
@@ -421,7 +431,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 		pdu_length = 4; /* enough to get RFC1001 header */
 
 incomplete_rcv:
-		if (echo_retries > 0 &&
+		if (echo_retries > 0 && server->tcpStatus == CifsGood &&
 		    time_after(jiffies, server->lstrp +
 					(echo_retries * SMB_ECHO_INTERVAL))) {
 			cERROR(1, "Server %s has not responded in %d seconds. "
@@ -486,8 +496,7 @@ incomplete_rcv:
 		/* Note that FC 1001 length is big endian on the wire,
 		but we convert it here so it is always manipulated
 		as host byte order */
-		pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
-		smb_buffer->smb_buf_length = pdu_length;
+		pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
 
 		cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
 
@@ -608,59 +617,63 @@ incomplete_rcv:
 		list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
 			mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
 
-			if ((mid_entry->mid == smb_buffer->Mid) &&
-			    (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
-			    (mid_entry->command == smb_buffer->Command)) {
-				if (length == 0 &&
-				   check2ndT2(smb_buffer, server->maxBuf) > 0) {
-					/* We have a multipart transact2 resp */
-					isMultiRsp = true;
-					if (mid_entry->resp_buf) {
-						/* merge response - fix up 1st*/
-						if (coalesce_t2(smb_buffer,
-							mid_entry->resp_buf)) {
-							mid_entry->multiRsp =
-								 true;
-							break;
-						} else {
-							/* all parts received */
-							mid_entry->multiEnd =
-								 true;
-							goto multi_t2_fnd;
-						}
+			if (mid_entry->mid != smb_buffer->Mid ||
+			    mid_entry->midState != MID_REQUEST_SUBMITTED ||
+			    mid_entry->command != smb_buffer->Command) {
+				mid_entry = NULL;
+				continue;
+			}
+
+			if (length == 0 &&
+			    check2ndT2(smb_buffer, server->maxBuf) > 0) {
+				/* We have a multipart transact2 resp */
+				isMultiRsp = true;
+				if (mid_entry->resp_buf) {
+					/* merge response - fix up 1st*/
+					length = coalesce_t2(smb_buffer,
+							mid_entry->resp_buf);
+					if (length > 0) {
+						length = 0;
+						mid_entry->multiRsp = true;
+						break;
 					} else {
-						if (!isLargeBuf) {
-							cERROR(1, "1st trans2 resp needs bigbuf");
-					/* BB maybe we can fix this up,  switch
-					   to already allocated large buffer? */
-						} else {
-							/* Have first buffer */
-							mid_entry->resp_buf =
-								 smb_buffer;
-							mid_entry->largeBuf =
-								 true;
-							bigbuf = NULL;
-						}
+						/* all parts received or
+						 * packet is malformed
+						 */
+						mid_entry->multiEnd = true;
+						goto multi_t2_fnd;
+					}
+				} else {
+					if (!isLargeBuf) {
+						/*
+						 * FIXME: switch to already
+						 *        allocated largebuf?
+						 */
+						cERROR(1, "1st trans2 resp "
+							  "needs bigbuf");
+					} else {
+						/* Have first buffer */
+						mid_entry->resp_buf =
+							 smb_buffer;
+						mid_entry->largeBuf = true;
+						bigbuf = NULL;
 					}
-					break;
 				}
-				mid_entry->resp_buf = smb_buffer;
-				mid_entry->largeBuf = isLargeBuf;
+				break;
+			}
+			mid_entry->resp_buf = smb_buffer;
+			mid_entry->largeBuf = isLargeBuf;
 multi_t2_fnd:
-				if (length == 0)
-					mid_entry->midState =
-							MID_RESPONSE_RECEIVED;
-				else
-					mid_entry->midState =
-							MID_RESPONSE_MALFORMED;
+			if (length == 0)
+				mid_entry->midState = MID_RESPONSE_RECEIVED;
+			else
+				mid_entry->midState = MID_RESPONSE_MALFORMED;
 #ifdef CONFIG_CIFS_STATS2
-				mid_entry->when_received = jiffies;
+			mid_entry->when_received = jiffies;
 #endif
-				list_del_init(&mid_entry->qhead);
-				mid_entry->callback(mid_entry);
-				break;
-			}
-			mid_entry = NULL;
+			list_del_init(&mid_entry->qhead);
+			mid_entry->callback(mid_entry);
+			break;
 		}
 		spin_unlock(&GlobalMid_Lock);
 
@@ -722,7 +735,7 @@ multi_t2_fnd:
 		sock_release(csocket);
 		server->ssocket = NULL;
 	}
-	/* buffer usuallly freed in free_mid - need to free it here on exit */
+	/* buffer usually freed in free_mid - need to free it here on exit */
 	cifs_buf_release(bigbuf);
 	if (smallbuf) /* no sense logging a debug message if NULL */
 		cifs_small_buf_release(smallbuf);
@@ -805,11 +818,11 @@ extract_hostname(const char *unc)
 }
 
 static int
-cifs_parse_mount_options(char *options, const char *devname,
+cifs_parse_mount_options(const char *mountdata, const char *devname,
 			 struct smb_vol *vol)
 {
-	char *value;
-	char *data;
+	char *value, *data, *end;
+	char *mountdata_copy, *options;
 	unsigned int  temp_len, i, j;
 	char separator[2];
 	short int override_uid = -1;
@@ -849,9 +862,15 @@ cifs_parse_mount_options(char *options, const char *devname,
 
 	vol->actimeo = CIFS_DEF_ACTIMEO;
 
-	if (!options)
-		return 1;
+	if (!mountdata)
+		goto cifs_parse_mount_err;
+
+	mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
+	if (!mountdata_copy)
+		goto cifs_parse_mount_err;
 
+	options = mountdata_copy;
+	end = options + strlen(options);
 	if (strncmp(options, "sep=", 4) == 0) {
 		if (options[4] != 0) {
 			separator[0] = options[4];
@@ -876,16 +895,22 @@ cifs_parse_mount_options(char *options, const char *devname,
 			if (!value) {
 				printk(KERN_WARNING
 				       "CIFS: invalid or missing username\n");
-				return 1;	/* needs_arg; */
+				goto cifs_parse_mount_err;
 			} else if (!*value) {
 				/* null user, ie anonymous, authentication */
 				vol->nullauth = 1;
 			}
-			if (strnlen(value, 200) < 200) {
-				vol->username = value;
+			if (strnlen(value, MAX_USERNAME_SIZE) <
+						MAX_USERNAME_SIZE) {
+				vol->username = kstrdup(value, GFP_KERNEL);
+				if (!vol->username) {
+					printk(KERN_WARNING "CIFS: no memory "
+							    "for username\n");
+					goto cifs_parse_mount_err;
+				}
 			} else {
 				printk(KERN_WARNING "CIFS: username too long\n");
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 		} else if (strnicmp(data, "pass", 4) == 0) {
 			if (!value) {
@@ -916,6 +941,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 			the only illegal character in a password is null */
 
 			if ((value[temp_len] == 0) &&
+			    (value + temp_len < end) &&
 			    (value[temp_len+1] == separator[0])) {
 				/* reinsert comma */
 				value[temp_len] = separator[0];
@@ -948,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 				if (vol->password == NULL) {
 					printk(KERN_WARNING "CIFS: no memory "
 							    "for password\n");
-					return 1;
+					goto cifs_parse_mount_err;
 				}
 				for (i = 0, j = 0; i < temp_len; i++, j++) {
 					vol->password[j] = value[i];
@@ -964,7 +990,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 				if (vol->password == NULL) {
 					printk(KERN_WARNING "CIFS: no memory "
 							    "for password\n");
-					return 1;
+					goto cifs_parse_mount_err;
 				}
 				strcpy(vol->password, value);
 			}
@@ -974,11 +1000,16 @@ cifs_parse_mount_options(char *options, const char *devname,
 				vol->UNCip = NULL;
 			} else if (strnlen(value, INET6_ADDRSTRLEN) <
 							INET6_ADDRSTRLEN) {
-				vol->UNCip = value;
+				vol->UNCip = kstrdup(value, GFP_KERNEL);
+				if (!vol->UNCip) {
+					printk(KERN_WARNING "CIFS: no memory "
+							    "for UNC IP\n");
+					goto cifs_parse_mount_err;
+				}
 			} else {
 				printk(KERN_WARNING "CIFS: ip address "
 						    "too long\n");
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 		} else if (strnicmp(data, "sec", 3) == 0) {
 			if (!value || !*value) {
@@ -991,7 +1022,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 				/* vol->secFlg |= CIFSSEC_MUST_SEAL |
 					CIFSSEC_MAY_KRB5; */
 				cERROR(1, "Krb5 cifs privacy not supported");
-				return 1;
+				goto cifs_parse_mount_err;
 			} else if (strnicmp(value, "krb5", 4) == 0) {
 				vol->secFlg |= CIFSSEC_MAY_KRB5;
 			} else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1021,7 +1052,23 @@ cifs_parse_mount_options(char *options, const char *devname,
 				vol->nullauth = 1;
 			} else {
 				cERROR(1, "bad security option: %s", value);
-				return 1;
+				goto cifs_parse_mount_err;
+			}
+		} else if (strnicmp(data, "vers", 3) == 0) {
+			if (!value || !*value) {
+				cERROR(1, "no protocol version specified"
+					  " after vers= mount option");
+			} else if ((strnicmp(value, "cifs", 4) == 0) ||
+				   (strnicmp(value, "1", 1) == 0)) {
+				/* this is the default */
+				continue;
+			} else if ((strnicmp(value, "smb2", 4) == 0) ||
+				   (strnicmp(value, "2", 1) == 0)) {
+#ifdef CONFIG_CIFS_SMB2
+				vol->use_smb2 = true;
+#else
+				cERROR(1, "smb2 support not enabled");
+#endif /* CONFIG_CIFS_SMB2 */
 			}
 		} else if ((strnicmp(data, "unc", 3) == 0)
 			   || (strnicmp(data, "target", 6) == 0)
@@ -1029,12 +1076,12 @@ cifs_parse_mount_options(char *options, const char *devname,
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: invalid path to "
 						    "network resource\n");
-				return 1;	/* needs_arg; */
+				goto cifs_parse_mount_err;
 			}
 			if ((temp_len = strnlen(value, 300)) < 300) {
 				vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
 				if (vol->UNC == NULL)
-					return 1;
+					goto cifs_parse_mount_err;
 				strcpy(vol->UNC, value);
 				if (strncmp(vol->UNC, "//", 2) == 0) {
 					vol->UNC[0] = '\\';
@@ -1043,27 +1090,32 @@ cifs_parse_mount_options(char *options, const char *devname,
 					printk(KERN_WARNING
 					       "CIFS: UNC Path does not begin "
 					       "with // or \\\\ \n");
-					return 1;
+					goto cifs_parse_mount_err;
 				}
 			} else {
 				printk(KERN_WARNING "CIFS: UNC name too long\n");
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 		} else if ((strnicmp(data, "domain", 3) == 0)
 			   || (strnicmp(data, "workgroup", 5) == 0)) {
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: invalid domain name\n");
-				return 1;	/* needs_arg; */
+				goto cifs_parse_mount_err;
 			}
 			/* BB are there cases in which a comma can be valid in
 			a domain name and need special handling? */
 			if (strnlen(value, 256) < 256) {
-				vol->domainname = value;
+				vol->domainname = kstrdup(value, GFP_KERNEL);
+				if (!vol->domainname) {
+					printk(KERN_WARNING "CIFS: no memory "
+							    "for domainname\n");
+					goto cifs_parse_mount_err;
+				}
 				cFYI(1, "Domain name set");
 			} else {
 				printk(KERN_WARNING "CIFS: domain name too "
 						    "long\n");
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 		} else if (strnicmp(data, "srcaddr", 7) == 0) {
 			vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1071,7 +1123,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: srcaddr value"
 				       " not specified.\n");
-				return 1;	/* needs_arg; */
+				goto cifs_parse_mount_err;
 			}
 			i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
 						 value, strlen(value));
@@ -1079,20 +1131,20 @@ cifs_parse_mount_options(char *options, const char *devname,
 				printk(KERN_WARNING "CIFS:  Could not parse"
 				       " srcaddr: %s\n",
 				       value);
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 		} else if (strnicmp(data, "prefixpath", 10) == 0) {
 			if (!value || !*value) {
 				printk(KERN_WARNING
 					"CIFS: invalid path prefix\n");
-				return 1;       /* needs_argument */
+				goto cifs_parse_mount_err;
 			}
 			if ((temp_len = strnlen(value, 1024)) < 1024) {
 				if (value[0] != '/')
 					temp_len++;  /* missing leading slash */
 				vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
 				if (vol->prepath == NULL)
-					return 1;
+					goto cifs_parse_mount_err;
 				if (value[0] != '/') {
 					vol->prepath[0] = '/';
 					strcpy(vol->prepath+1, value);
@@ -1101,24 +1153,33 @@ cifs_parse_mount_options(char *options, const char *devname,
 				cFYI(1, "prefix path %s", vol->prepath);
 			} else {
 				printk(KERN_WARNING "CIFS: prefix too long\n");
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 		} else if (strnicmp(data, "iocharset", 9) == 0) {
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: invalid iocharset "
 						    "specified\n");
-				return 1;	/* needs_arg; */
+				goto cifs_parse_mount_err;
 			}
 			if (strnlen(value, 65) < 65) {
-				if (strnicmp(value, "default", 7))
-					vol->iocharset = value;
+				if (strnicmp(value, "default", 7)) {
+					vol->iocharset = kstrdup(value,
+								 GFP_KERNEL);
+
+					if (!vol->iocharset) {
+						printk(KERN_WARNING "CIFS: no "
+								   "memory for"
+								   "charset\n");
+						goto cifs_parse_mount_err;
+					}
+				}
 				/* if iocharset not set then load_nls_default
 				   is used by caller */
 				cFYI(1, "iocharset set to %s", value);
 			} else {
 				printk(KERN_WARNING "CIFS: iocharset name "
 						    "too long.\n");
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 		} else if (!strnicmp(data, "uid", 3) && value && *value) {
 			vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1231,7 +1292,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 				if (vol->actimeo > CIFS_MAX_ACTIMEO) {
 					cERROR(1, "CIFS: attribute cache"
 							"timeout too large");
-					return 1;
+					goto cifs_parse_mount_err;
 				}
 			}
 		} else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1375,7 +1436,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 #ifndef CONFIG_CIFS_FSCACHE
 			cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
 				  "kernel config option set");
-			return 1;
+			goto cifs_parse_mount_err;
 #endif
 			vol->fsc = true;
 		} else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1390,12 +1451,12 @@ cifs_parse_mount_options(char *options, const char *devname,
 		if (devname == NULL) {
 			printk(KERN_WARNING "CIFS: Missing UNC name for mount "
 						"target\n");
-			return 1;
+			goto cifs_parse_mount_err;
 		}
 		if ((temp_len = strnlen(devname, 300)) < 300) {
 			vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
 			if (vol->UNC == NULL)
-				return 1;
+				goto cifs_parse_mount_err;
 			strcpy(vol->UNC, devname);
 			if (strncmp(vol->UNC, "//", 2) == 0) {
 				vol->UNC[0] = '\\';
@@ -1403,21 +1464,21 @@ cifs_parse_mount_options(char *options, const char *devname,
 			} else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
 				printk(KERN_WARNING "CIFS: UNC Path does not "
 						    "begin with // or \\\\ \n");
-				return 1;
+				goto cifs_parse_mount_err;
 			}
 			value = strpbrk(vol->UNC+2, "/\\");
 			if (value)
 				*value = '\\';
 		} else {
 			printk(KERN_WARNING "CIFS: UNC name too long\n");
-			return 1;
+			goto cifs_parse_mount_err;
 		}
 	}
 
 	if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
 		cERROR(1, "Multiuser mounts currently require krb5 "
 			  "authentication!");
-		return 1;
+		goto cifs_parse_mount_err;
 	}
 
 	if (vol->UNCip == NULL)
@@ -1435,7 +1496,12 @@ cifs_parse_mount_options(char *options, const char *devname,
 		printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
 				   "specified with no gid= option.\n");
 
+	kfree(mountdata_copy);
 	return 0;
+
+cifs_parse_mount_err:
+	kfree(mountdata_copy);
+	return 1;
 }
 
 /** Returns true if srcaddr isn't specified and rhs isn't
@@ -1472,7 +1538,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
 static bool
 match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
 {
-	unsigned short int port, *sport;
+	__be16 port, *sport;
 
 	switch (addr->sa_family) {
 	case AF_INET:
@@ -1765,6 +1831,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		module_put(THIS_MODULE);
 		goto out_err_crypto_release;
 	}
+	tcp_ses->tcpStatus = CifsNeedNegotiate;
 
 	/* thread spawned, put it on the list */
 	spin_lock(&cifs_tcp_ses_lock);
@@ -1808,7 +1875,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
 			break;
 		default:
 			/* anything else takes username/password */
-			if (strncmp(ses->userName, vol->username,
+			if (ses->user_name == NULL)
+				continue;
+			if (strncmp(ses->user_name, vol->username,
 				    MAX_USERNAME_SIZE))
 				continue;
 			if (strlen(vol->username) != 0 &&
@@ -1851,6 +1920,8 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
 	cifs_put_tcp_session(server);
 }
 
+static bool warned_on_ntlm;  /* globals init to false automatically */
+
 static struct cifsSesInfo *
 cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 {
@@ -1906,9 +1977,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	else
 		sprintf(ses->serverName, "%pI4", &addr->sin_addr);
 
-	if (volume_info->username)
-		strncpy(ses->userName, volume_info->username,
-			MAX_USERNAME_SIZE);
+	if (volume_info->username) {
+		ses->user_name = kstrdup(volume_info->username, GFP_KERNEL);
+		if (!ses->user_name)
+			goto get_ses_fail;
+	}
 
 	/* volume_info->password freed at unmount */
 	if (volume_info->password) {
@@ -1923,6 +1996,15 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	}
 	ses->cred_uid = volume_info->cred_uid;
 	ses->linux_uid = volume_info->linux_uid;
+
+	/* ntlmv2 is much stronger than ntlm security, and has been broadly
+	supported for many years, time to update default security mechanism */
+	if ((volume_info->secFlg == 0) && warned_on_ntlm == false) {
+		warned_on_ntlm = true;
+		cERROR(1, "default security mechanism requested.  The default "
+			"security mechanism will be upgraded from ntlm to "
+			"ntlmv2 in kernel release 2.6.41");
+	}
 	ses->overrideSecFlg = volume_info->secFlg;
 
 	mutex_lock(&ses->session_mutex);
@@ -2249,7 +2331,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
 		smb_buf = (struct smb_hdr *)ses_init_buf;
 
 		/* sizeof RFC1002_SESSION_REQUEST with no scope */
-		smb_buf->smb_buf_length = 0x81000044;
+		smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
 		rc = smb_send(server, smb_buf, 0x44);
 		kfree(ses_init_buf);
 		/*
@@ -2276,7 +2358,7 @@ static int
 generic_ip_connect(struct TCP_Server_Info *server)
 {
 	int rc = 0;
-	unsigned short int sport;
+	__be16 sport;
 	int slen, sfamily;
 	struct socket *socket = server->ssocket;
 	struct sockaddr *saddr;
@@ -2361,7 +2443,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
 static int
 ip_connect(struct TCP_Server_Info *server)
 {
-	unsigned short int *sport;
+	__be16 *sport;
 	struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
 	struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
 
@@ -2642,6 +2724,11 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
 			      0 /* not legacy */, cifs_sb->local_nls,
 			      cifs_sb->mnt_cifs_flags &
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+	if (rc == -EOPNOTSUPP || rc == -EINVAL)
+		rc = SMBQueryInformation(xid, tcon, full_path, pfile_info,
+				cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+				  CIFS_MOUNT_MAP_SPECIAL_CHR);
 	kfree(pfile_info);
 	return rc;
 }
@@ -2655,8 +2742,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
 		return;
 
 	volume_info = *pvolume_info;
+	kfree(volume_info->username);
 	kzfree(volume_info->password);
 	kfree(volume_info->UNC);
+	kfree(volume_info->UNCip);
+	kfree(volume_info->domainname);
+	kfree(volume_info->iocharset);
 	kfree(volume_info->prepath);
 	kfree(volume_info);
 	*pvolume_info = NULL;
@@ -2693,11 +2784,65 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
 	full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
 	return full_path;
 }
+
+/*
+ * Perform a dfs referral query for a share and (optionally) prefix
+ *
+ * If a referral is found, cifs_sb->mountdata will be (re-)allocated
+ * to a string containing updated options for the submount.  Otherwise it
+ * will be left untouched.
+ *
+ * Returns the rc from get_dfs_path to the caller, which can be used to
+ * determine whether there were referrals.
+ */
+static int
+expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
+		    struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
+		    int check_prefix)
+{
+	int rc;
+	unsigned int num_referrals = 0;
+	struct dfs_info3_param *referrals = NULL;
+	char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
+
+	full_path = build_unc_path_to_root(volume_info, cifs_sb);
+	if (IS_ERR(full_path))
+		return PTR_ERR(full_path);
+
+	/* For DFS paths, skip the first '\' of the UNC */
+	ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
+
+	rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
+			  &num_referrals, &referrals,
+			  cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+	if (!rc && num_referrals > 0) {
+		char *fake_devname = NULL;
+
+		mdata = cifs_compose_mount_options(cifs_sb->mountdata,
+						   full_path + 1, referrals,
+						   &fake_devname);
+
+		free_dfs_info_array(referrals, num_referrals);
+		kfree(fake_devname);
+
+		if (cifs_sb->mountdata != NULL)
+			kfree(cifs_sb->mountdata);
+
+		if (IS_ERR(mdata)) {
+			rc = PTR_ERR(mdata);
+			mdata = NULL;
+		}
+		cifs_sb->mountdata = mdata;
+	}
+	kfree(full_path);
+	return rc;
+}
 #endif
 
 int
 cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
-		char *mount_data_global, const char *devname)
+		const char *devname)
 {
 	int rc;
 	int xid;
@@ -2706,13 +2851,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	struct cifsTconInfo *tcon;
 	struct TCP_Server_Info *srvTcp;
 	char   *full_path;
-	char *mount_data = mount_data_global;
 	struct tcon_link *tlink;
 #ifdef CONFIG_CIFS_DFS_UPCALL
-	struct dfs_info3_param *referrals = NULL;
-	unsigned int num_referrals = 0;
 	int referral_walks_count = 0;
 try_mount_again:
+	/* cleanup activities if we're chasing a referral */
+	if (referral_walks_count) {
+		if (tcon)
+			cifs_put_tcon(tcon);
+		else if (pSesInfo)
+			cifs_put_smb_ses(pSesInfo);
+
+		cleanup_volume_info(&volume_info);
+		FreeXid(xid);
+	}
 #endif
 	rc = 0;
 	tcon = NULL;
@@ -2729,7 +2881,8 @@ try_mount_again:
 		goto out;
 	}
 
-	if (cifs_parse_mount_options(mount_data, devname, volume_info)) {
+	if (cifs_parse_mount_options(cifs_sb->mountdata, devname,
+				     volume_info)) {
 		rc = -EINVAL;
 		goto out;
 	}
@@ -2825,8 +2978,26 @@ try_mount_again:
 			       (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
 
 remote_path_check:
+#ifdef CONFIG_CIFS_DFS_UPCALL
+	/*
+	 * Perform an unconditional check for whether there are DFS
+	 * referrals for this path without prefix, to provide support
+	 * for DFS referrals from w2k8 servers which don't seem to respond
+	 * with PATH_NOT_COVERED to requests that include the prefix.
+	 * Chase the referral if found, otherwise continue normally.
+	 */
+	if (referral_walks_count == 0) {
+		int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
+						cifs_sb, false);
+		if (!refrc) {
+			referral_walks_count++;
+			goto try_mount_again;
+		}
+	}
+#endif
+
 	/* check if a whole path (including prepath) is not remote */
-	if (!rc && cifs_sb->prepathlen && tcon) {
+	if (!rc && tcon) {
 		/* build_path_to_root works only when we have a valid tcon */
 		full_path = cifs_build_path_to_root(cifs_sb, tcon);
 		if (full_path == NULL) {
@@ -2858,46 +3029,15 @@ remote_path_check:
 		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
 			convert_delimiter(cifs_sb->prepath,
 					CIFS_DIR_SEP(cifs_sb));
-		full_path = build_unc_path_to_root(volume_info, cifs_sb);
-		if (IS_ERR(full_path)) {
-			rc = PTR_ERR(full_path);
-			goto mount_fail_check;
-		}
-
-		cFYI(1, "Getting referral for: %s", full_path);
-		rc = get_dfs_path(xid, pSesInfo , full_path + 1,
-			cifs_sb->local_nls, &num_referrals, &referrals,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (!rc && num_referrals > 0) {
-			char *fake_devname = NULL;
 
-			if (mount_data != mount_data_global)
-				kfree(mount_data);
-
-			mount_data = cifs_compose_mount_options(
-					cifs_sb->mountdata, full_path + 1,
-					referrals, &fake_devname);
-
-			free_dfs_info_array(referrals, num_referrals);
-			kfree(fake_devname);
-			kfree(full_path);
+		rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
+					 true);
 
-			if (IS_ERR(mount_data)) {
-				rc = PTR_ERR(mount_data);
-				mount_data = NULL;
-				goto mount_fail_check;
-			}
-
-			if (tcon)
-				cifs_put_tcon(tcon);
-			else if (pSesInfo)
-				cifs_put_smb_ses(pSesInfo);
-
-			cleanup_volume_info(&volume_info);
+		if (!rc) {
 			referral_walks_count++;
-			FreeXid(xid);
 			goto try_mount_again;
 		}
+		goto mount_fail_check;
 #else /* No DFS support, return error on mount */
 		rc = -EOPNOTSUPP;
 #endif
@@ -2930,8 +3070,6 @@ remote_path_check:
 mount_fail_check:
 	/* on error free sesinfo and tcon struct if needed */
 	if (rc) {
-		if (mount_data != mount_data_global)
-			kfree(mount_data);
 		/* If find_unc succeeded then rc == 0 so we can not end */
 		/* up accidentally freeing someone elses tcon struct */
 		if (tcon)
@@ -3047,7 +3185,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 	bcc_ptr += strlen("?????");
 	bcc_ptr += 1;
 	count = bcc_ptr - &pSMB->Password[0];
-	pSMB->hdr.smb_buf_length += count;
+	pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
+					pSMB->hdr.smb_buf_length) + count);
 	pSMB->ByteCount = cpu_to_le16(count);
 
 	rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3222,7 +3361,9 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
 	struct cifsSesInfo *ses;
 	struct cifsTconInfo *tcon = NULL;
 	struct smb_vol *vol_info;
-	char username[MAX_USERNAME_SIZE + 1];
+	char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
+			   /* We used to have this as MAX_USERNAME which is   */
+			   /* way too big now (256 instead of 32) */
 
 	vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
 	if (vol_info == NULL) {
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
 #include "cifs_debug.h"
 #include "cifsfs.h"
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
 static struct dentry *cifs_get_parent(struct dentry *dentry)
 {
 	/* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
 	.encode_fs =  */
 };
 
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c27d236738fc..c672afef0c09 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -575,8 +575,10 @@ reopen_error_exit:
 
 int cifs_close(struct inode *inode, struct file *file)
 {
-	cifsFileInfo_put(file->private_data);
-	file->private_data = NULL;
+	if (file->private_data != NULL) {
+		cifsFileInfo_put(file->private_data);
+		file->private_data = NULL;
+	}
 
 	/* return code from the ->release op is always ignored */
 	return 0;
@@ -855,95 +857,6 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 		cifsi->server_eof = end_of_write;
 }
 
-ssize_t cifs_user_write(struct file *file, const char __user *write_data,
-	size_t write_size, loff_t *poffset)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int rc = 0;
-	unsigned int bytes_written = 0;
-	unsigned int total_written;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	int xid;
-	struct cifsFileInfo *open_file;
-	struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-
-	/* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
-	   *poffset, file->f_path.dentry->d_name.name); */
-
-	if (file->private_data == NULL)
-		return -EBADF;
-
-	open_file = file->private_data;
-	pTcon = tlink_tcon(open_file->tlink);
-
-	rc = generic_write_checks(file, poffset, &write_size, 0);
-	if (rc)
-		return rc;
-
-	xid = GetXid();
-
-	for (total_written = 0; write_size > total_written;
-	     total_written += bytes_written) {
-		rc = -EAGAIN;
-		while (rc == -EAGAIN) {
-			if (file->private_data == NULL) {
-				/* file has been closed on us */
-				FreeXid(xid);
-			/* if we have gotten here we have written some data
-			   and blocked, and the file has been freed on us while
-			   we blocked so return what we managed to write */
-				return total_written;
-			}
-			if (open_file->invalidHandle) {
-				/* we could deadlock if we called
-				   filemap_fdatawait from here so tell
-				   reopen_file not to flush data to server
-				   now */
-				rc = cifs_reopen_file(open_file, false);
-				if (rc != 0)
-					break;
-			}
-
-			rc = CIFSSMBWrite(xid, pTcon,
-				open_file->netfid,
-				min_t(const int, cifs_sb->wsize,
-				      write_size - total_written),
-				*poffset, &bytes_written,
-				NULL, write_data + total_written, 0);
-		}
-		if (rc || (bytes_written == 0)) {
-			if (total_written)
-				break;
-			else {
-				FreeXid(xid);
-				return rc;
-			}
-		} else {
-			cifs_update_eof(cifsi, *poffset, bytes_written);
-			*poffset += bytes_written;
-		}
-	}
-
-	cifs_stats_bytes_written(pTcon, total_written);
-
-/* Do not update local mtime - server will set its actual value on write
- *	inode->i_ctime = inode->i_mtime =
- * 		current_fs_time(inode->i_sb);*/
-	if (total_written > 0) {
-		spin_lock(&inode->i_lock);
-		if (*poffset > inode->i_size)
-			i_size_write(inode, *poffset);
-		spin_unlock(&inode->i_lock);
-	}
-	mark_inode_dirty_sync(inode);
-
-	FreeXid(xid);
-	return total_written;
-}
-
 static ssize_t cifs_write(struct cifsFileInfo *open_file,
 			  const char *write_data, size_t write_size,
 			  loff_t *poffset)
@@ -970,6 +883,9 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
 	     total_written += bytes_written) {
 		rc = -EAGAIN;
 		while (rc == -EAGAIN) {
+			struct kvec iov[2];
+			unsigned int len;
+
 			if (open_file->invalidHandle) {
 				/* we could deadlock if we called
 				   filemap_fdatawait from here so tell
@@ -979,31 +895,14 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
 				if (rc != 0)
 					break;
 			}
-			if (experimEnabled || (pTcon->ses->server &&
-				((pTcon->ses->server->secMode &
-				(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-				== 0))) {
-				struct kvec iov[2];
-				unsigned int len;
-
-				len = min((size_t)cifs_sb->wsize,
-					  write_size - total_written);
-				/* iov[0] is reserved for smb header */
-				iov[1].iov_base = (char *)write_data +
-						  total_written;
-				iov[1].iov_len = len;
-				rc = CIFSSMBWrite2(xid, pTcon,
-						open_file->netfid, len,
-						*poffset, &bytes_written,
-						iov, 1, 0);
-			} else
-				rc = CIFSSMBWrite(xid, pTcon,
-					 open_file->netfid,
-					 min_t(const int, cifs_sb->wsize,
-					       write_size - total_written),
-					 *poffset, &bytes_written,
-					 write_data + total_written,
-					 NULL, 0);
+
+			len = min((size_t)cifs_sb->wsize,
+				  write_size - total_written);
+			/* iov[0] is reserved for smb header */
+			iov[1].iov_base = (char *)write_data + total_written;
+			iov[1].iov_len = len;
+			rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len,
+					   *poffset, &bytes_written, iov, 1, 0);
 		}
 		if (rc || (bytes_written == 0)) {
 			if (total_written)
@@ -1240,12 +1139,6 @@ static int cifs_writepages(struct address_space *mapping,
 	}
 
 	tcon = tlink_tcon(open_file->tlink);
-	if (!experimEnabled && tcon->ses->server->secMode &
-			(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
-		cifsFileInfo_put(open_file);
-		kfree(iov);
-		return generic_writepages(mapping, wbc);
-	}
 	cifsFileInfo_put(open_file);
 
 	xid = GetXid();
@@ -1438,9 +1331,10 @@ retry_write:
 	return rc;
 }
 
-static int cifs_writepage(struct page *page, struct writeback_control *wbc)
+static int
+cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
 {
-	int rc = -EFAULT;
+	int rc;
 	int xid;
 
 	xid = GetXid();
@@ -1460,15 +1354,29 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
 	 * to fail to update with the state of the page correctly.
 	 */
 	set_page_writeback(page);
+retry_write:
 	rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
-	SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
-	unlock_page(page);
+	if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
+		goto retry_write;
+	else if (rc == -EAGAIN)
+		redirty_page_for_writepage(wbc, page);
+	else if (rc != 0)
+		SetPageError(page);
+	else
+		SetPageUptodate(page);
 	end_page_writeback(page);
 	page_cache_release(page);
 	FreeXid(xid);
 	return rc;
 }
 
+static int cifs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	int rc = cifs_writepage_locked(page, wbc);
+	unlock_page(page);
+	return rc;
+}
+
 static int cifs_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
@@ -1537,8 +1445,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
 	cFYI(1, "Sync file - name: %s datasync: 0x%x",
 		file->f_path.dentry->d_name.name, datasync);
 
-	if (!CIFS_I(inode)->clientCanCacheRead)
-		cifs_invalidate_mapping(inode);
+	if (!CIFS_I(inode)->clientCanCacheRead) {
+		rc = cifs_invalidate_mapping(inode);
+		if (rc) {
+			cFYI(1, "rc: %d during invalidate phase", rc);
+			rc = 0; /* don't care about it in fsync */
+		}
+	}
 
 	tcon = tlink_tcon(smbfile->tlink);
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1744,7 +1657,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 	return total_written;
 }
 
-static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
+ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
 	ssize_t written;
@@ -1867,17 +1780,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
 	return total_read;
 }
 
-ssize_t cifs_user_read(struct file *file, char __user *read_data,
-		       size_t read_size, loff_t *poffset)
-{
-	struct iovec iov;
-	iov.iov_base = read_data;
-	iov.iov_len = read_size;
-
-	return cifs_iovec_read(file, &iov, 1, poffset);
-}
-
-static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t pos)
 {
 	ssize_t read;
@@ -1980,6 +1883,24 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	return total_read;
 }
 
+/*
+ * If the page is mmap'ed into a process' page tables, then we need to make
+ * sure that it doesn't change while being written back.
+ */
+static int
+cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *page = vmf->page;
+
+	lock_page(page);
+	return VM_FAULT_LOCKED;
+}
+
+static struct vm_operations_struct cifs_file_vm_ops = {
+	.fault = filemap_fault,
+	.page_mkwrite = cifs_page_mkwrite,
+};
+
 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int rc, xid;
@@ -1987,10 +1908,15 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
 
 	xid = GetXid();
 
-	if (!CIFS_I(inode)->clientCanCacheRead)
-		cifs_invalidate_mapping(inode);
+	if (!CIFS_I(inode)->clientCanCacheRead) {
+		rc = cifs_invalidate_mapping(inode);
+		if (rc)
+			return rc;
+	}
 
 	rc = generic_file_mmap(file, vma);
+	if (rc == 0)
+		vma->vm_ops = &cifs_file_vm_ops;
 	FreeXid(xid);
 	return rc;
 }
@@ -2007,6 +1933,8 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 		return rc;
 	}
 	rc = generic_file_mmap(file, vma);
+	if (rc == 0)
+		vma->vm_ops = &cifs_file_vm_ops;
 	FreeXid(xid);
 	return rc;
 }
@@ -2411,6 +2339,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
 		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
 }
 
+static int cifs_launder_page(struct page *page)
+{
+	int rc = 0;
+	loff_t range_start = page_offset(page);
+	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0,
+		.range_start = range_start,
+		.range_end = range_end,
+	};
+
+	cFYI(1, "Launder page: %p", page);
+
+	if (clear_page_dirty_for_io(page))
+		rc = cifs_writepage_locked(page, &wbc);
+
+	cifs_fscache_invalidate_page(page, page->mapping->host);
+	return rc;
+}
+
 void cifs_oplock_break(struct work_struct *work)
 {
 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2482,7 +2431,7 @@ const struct address_space_operations cifs_addr_ops = {
 	.set_page_dirty = __set_page_dirty_nobuffers,
 	.releasepage = cifs_release_page,
 	.invalidatepage = cifs_invalidate_page,
-	/* .direct_IO = */
+	.launder_page = cifs_launder_page,
 };
 
 /*
@@ -2499,5 +2448,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
 	.set_page_dirty = __set_page_dirty_nobuffers,
 	.releasepage = cifs_release_page,
 	.invalidatepage = cifs_invalidate_page,
-	/* .direct_IO = */
+	.launder_page = cifs_launder_page,
 };
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..de02ed5e25c2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -878,7 +878,7 @@ retry_iget5_locked:
 }
 
 /* gets root inode */
-struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
+struct inode *cifs_root_iget(struct super_block *sb)
 {
 	int xid;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1683,71 +1683,70 @@ cifs_inode_needs_reval(struct inode *inode)
 /*
  * Zap the cache. Called when invalid_mapping flag is set.
  */
-void
+int
 cifs_invalidate_mapping(struct inode *inode)
 {
-	int rc;
+	int rc = 0;
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
 
 	cifs_i->invalid_mapping = false;
 
-	/* write back any cached data */
 	if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
-		rc = filemap_write_and_wait(inode->i_mapping);
-		mapping_set_error(inode->i_mapping, rc);
+		rc = invalidate_inode_pages2(inode->i_mapping);
+		if (rc) {
+			cERROR(1, "%s: could not invalidate inode %p", __func__,
+			       inode);
+			cifs_i->invalid_mapping = true;
+		}
 	}
-	invalidate_remote_inode(inode);
+
 	cifs_fscache_reset_inode_cookie(inode);
+	return rc;
 }
 
-int cifs_revalidate_file(struct file *filp)
+int cifs_revalidate_file_attr(struct file *filp)
 {
 	int rc = 0;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
 
 	if (!cifs_inode_needs_reval(inode))
-		goto check_inval;
+		return rc;
 
 	if (tlink_tcon(cfile->tlink)->unix_ext)
 		rc = cifs_get_file_info_unix(filp);
 	else
 		rc = cifs_get_file_info(filp);
 
-check_inval:
-	if (CIFS_I(inode)->invalid_mapping)
-		cifs_invalidate_mapping(inode);
-
 	return rc;
 }
 
-/* revalidate a dentry's inode attributes */
-int cifs_revalidate_dentry(struct dentry *dentry)
+int cifs_revalidate_dentry_attr(struct dentry *dentry)
 {
 	int xid;
 	int rc = 0;
-	char *full_path = NULL;
 	struct inode *inode = dentry->d_inode;
 	struct super_block *sb = dentry->d_sb;
+	char *full_path = NULL;
 
 	if (inode == NULL)
 		return -ENOENT;
 
-	xid = GetXid();
-
 	if (!cifs_inode_needs_reval(inode))
-		goto check_inval;
+		return rc;
+
+	xid = GetXid();
 
 	/* can not safely grab the rename sem here if rename calls revalidate
 	   since that would deadlock */
 	full_path = build_path_from_dentry(dentry);
 	if (full_path == NULL) {
 		rc = -ENOMEM;
-		goto check_inval;
+		goto out;
 	}
 
-	cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
-		 "jiffies %ld", full_path, inode, inode->i_count.counter,
+	cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
+		 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
 		 dentry, dentry->d_time, jiffies);
 
 	if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1755,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
 		rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
 					 xid, NULL);
 
-check_inval:
-	if (CIFS_I(inode)->invalid_mapping)
-		cifs_invalidate_mapping(inode);
-
+out:
 	kfree(full_path);
 	FreeXid(xid);
 	return rc;
 }
 
+int cifs_revalidate_file(struct file *filp)
+{
+	int rc;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+
+	rc = cifs_revalidate_file_attr(filp);
+	if (rc)
+		return rc;
+
+	if (CIFS_I(inode)->invalid_mapping)
+		rc = cifs_invalidate_mapping(inode);
+	return rc;
+}
+
+/* revalidate a dentry's inode attributes */
+int cifs_revalidate_dentry(struct dentry *dentry)
+{
+	int rc;
+	struct inode *inode = dentry->d_inode;
+
+	rc = cifs_revalidate_dentry_attr(dentry);
+	if (rc)
+		return rc;
+
+	if (CIFS_I(inode)->invalid_mapping)
+		rc = cifs_invalidate_mapping(inode);
+	return rc;
+}
+
 int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		 struct kstat *stat)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
 	struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
-	int err = cifs_revalidate_dentry(dentry);
-
-	if (!err) {
-		generic_fillattr(dentry->d_inode, stat);
-		stat->blksize = CIFS_MAX_MSGSIZE;
-		stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
+	struct inode *inode = dentry->d_inode;
+	int rc;
 
-		/*
-		 * If on a multiuser mount without unix extensions, and the
-		 * admin hasn't overridden them, set the ownership to the
-		 * fsuid/fsgid of the current process.
-		 */
-		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
-		    !tcon->unix_ext) {
-			if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
-				stat->uid = current_fsuid();
-			if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
-				stat->gid = current_fsgid();
+	/*
+	 * We need to be sure that all dirty pages are written and the server
+	 * has actual ctime, mtime and file length.
+	 */
+	if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
+	    inode->i_mapping->nrpages != 0) {
+		rc = filemap_fdatawait(inode->i_mapping);
+		if (rc) {
+			mapping_set_error(inode->i_mapping, rc);
+			return rc;
 		}
 	}
-	return err;
+
+	rc = cifs_revalidate_dentry_attr(dentry);
+	if (rc)
+		return rc;
+
+	generic_fillattr(inode, stat);
+	stat->blksize = CIFS_MAX_MSGSIZE;
+	stat->ino = CIFS_I(inode)->uniqueid;
+
+	/*
+	 * If on a multiuser mount without unix extensions, and the admin hasn't
+	 * overridden them, set the ownership to the fsuid/fsgid of the current
+	 * process.
+	 */
+	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
+	    !tcon->unix_ext) {
+		if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
+			stat->uid = current_fsuid();
+		if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
+			stat->gid = current_fsgid();
+	}
+	return rc;
 }
 
 static int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e8804d373404..ce417a9764a3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -239,7 +239,7 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
 	if (rc != 0)
 		return rc;
 
-	if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) {
+	if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
 		CIFSSMBClose(xid, tcon, netfid);
 		/* it's not a symlink */
 		return -EINVAL;
@@ -316,7 +316,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
 	if (rc != 0)
 		goto out;
 
-	if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) {
+	if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
 		CIFSSMBClose(xid, pTcon, netfid);
 		/* it's not a symlink */
 		goto out;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2a930a752a78..907531ac5888 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -100,6 +100,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
 		memset(buf_to_free->password, 0, strlen(buf_to_free->password));
 		kfree(buf_to_free->password);
 	}
+	kfree(buf_to_free->user_name);
 	kfree(buf_to_free->domainName);
 	kfree(buf_to_free);
 }
@@ -303,12 +304,10 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
 
 	memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
 
-	buffer->smb_buf_length =
+	buffer->smb_buf_length = cpu_to_be32(
 	    (2 * word_count) + sizeof(struct smb_hdr) -
 	    4 /*  RFC 1001 length field does not count */  +
-	    2 /* for bcc field itself */ ;
-	/* Note that this is the only network field that has to be converted
-	   to big endian and it is done just before we send it */
+	    2 /* for bcc field itself */) ;
 
 	buffer->Protocol[0] = 0xFF;
 	buffer->Protocol[1] = 'S';
@@ -423,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
 int
 checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
 {
-	__u32 len = smb->smb_buf_length;
+	__u32 len = be32_to_cpu(smb->smb_buf_length);
 	__u32 clc_len;  /* calculated length */
 	cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
 
@@ -463,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
 
 	if (check_smb_hdr(smb, mid))
 		return 1;
-	clc_len = smbCalcSize_LE(smb);
+	clc_len = smbCalcSize(smb);
 
 	if (4 + len != length) {
 		cERROR(1, "Length read does not match RFC1001 length %d",
@@ -520,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
 			(struct smb_com_transaction_change_notify_rsp *)buf;
 		struct file_notify_information *pnotify;
 		__u32 data_offset = 0;
-		if (pSMBr->ByteCount > sizeof(struct file_notify_information)) {
+		if (get_bcc(buf) > sizeof(struct file_notify_information)) {
 			data_offset = le32_to_cpu(pSMBr->DataOffset);
 
 			pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..79b71c2c7c9d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
 		2 /* size of the bcc field */ + get_bcc(ptr));
 }
 
-unsigned int
-smbCalcSize_LE(struct smb_hdr *ptr)
-{
-	return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
-		2 /* size of the bcc field */ + get_bcc_le(ptr));
-}
-
 /* The following are taken from fs/ntfs/util.c */
 
 #define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 16765703131b..7dd462100378 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -219,12 +219,12 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 		bcc_ptr++;
 	} */
 	/* copy user */
-	if (ses->userName == NULL) {
+	if (ses->user_name == NULL) {
 		/* null user mount */
 		*bcc_ptr = 0;
 		*(bcc_ptr+1) = 0;
 	} else {
-		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
+		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->user_name,
 					  MAX_USERNAME_SIZE, nls_cp);
 	}
 	bcc_ptr += 2 * bytes_ret;
@@ -244,12 +244,11 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 	/* copy user */
 	/* BB what about null user mounts - check that we do this BB */
 	/* copy user */
-	if (ses->userName == NULL) {
-		/* BB what about null user mounts - check that we do this BB */
-	} else {
-		strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE);
-	}
-	bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
+	if (ses->user_name != NULL)
+		strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
+	/* else null user mount */
+
+	bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
 	*bcc_ptr = 0;
 	bcc_ptr++; /* account for null termination */
 
@@ -277,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 }
 
 static void
-decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
+decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
 		      const struct nls_table *nls_cp)
 {
 	int len;
@@ -285,19 +284,6 @@ decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
 
 	cFYI(1, "bleft %d", bleft);
 
-	/*
-	 * Windows servers do not always double null terminate their final
-	 * Unicode string. Check to see if there are an uneven number of bytes
-	 * left. If so, then add an extra NULL pad byte to the end of the
-	 * response.
-	 *
-	 * See section 2.7.2 in "Implementing CIFS" for details
-	 */
-	if (bleft % 2) {
-		data[bleft] = 0;
-		++bleft;
-	}
-
 	kfree(ses->serverOS);
 	ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
 	cFYI(1, "serverOS=%s", ses->serverOS);
@@ -405,8 +391,8 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 	/* BB spec says that if AvId field of MsvAvTimestamp is populated then
 		we must set the MIC field of the AUTHENTICATE_MESSAGE */
 	ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
-	tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
-	tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
+	tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
+	tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
 	if (tilen) {
 		ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
 		if (!ses->auth_key.response) {
@@ -523,14 +509,14 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		tmp += len;
 	}
 
-	if (ses->userName == NULL) {
+	if (ses->user_name == NULL) {
 		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
 		sec_blob->UserName.Length = 0;
 		sec_blob->UserName.MaximumLength = 0;
 		tmp += 2;
 	} else {
 		int len;
-		len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
+		len = cifs_strtoUCS((__le16 *)tmp, ses->user_name,
 				    MAX_USERNAME_SIZE, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
 		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -635,7 +621,7 @@ ssetup_ntlmssp_authenticate:
 	and rest of bcc area. This allows us to avoid
 	a large buffer 17K allocation */
 	iov[0].iov_base = (char *)pSMB;
-	iov[0].iov_len = smb_buf->smb_buf_length + 4;
+	iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
 
 	/* setting this here allows the code at the end of the function
 	   to free the request buffer if there's an error */
@@ -670,7 +656,7 @@ ssetup_ntlmssp_authenticate:
 		 * to use challenge/response method (i.e. Password bit is 1).
 		 */
 
-		calc_lanman_hash(ses->password, ses->server->cryptkey,
+		rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
 				 ses->server->secMode & SECMODE_PW_ENCRYPT ?
 					true : false, lnm_session_key);
 
@@ -873,9 +859,10 @@ ssetup_ntlmssp_authenticate:
 	iov[2].iov_len = (long) bcc_ptr - (long) str_area;
 
 	count = iov[1].iov_len + iov[2].iov_len;
-	smb_buf->smb_buf_length += count;
+	smb_buf->smb_buf_length =
+		cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
 
-	put_bcc_le(count, smb_buf);
+	put_bcc(count, smb_buf);
 
 	rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
 			  CIFS_LOG_ERROR);
@@ -930,7 +917,9 @@ ssetup_ntlmssp_authenticate:
 	}
 
 	/* BB check if Unicode and decode strings */
-	if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
+	if (bytes_remaining == 0) {
+		/* no string area to decode, do nothing */
+	} else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
 		/* unicode string area must be word-aligned */
 		if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
 			++bcc_ptr;
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
-   Unix SMB/Netbios implementation.
-   Version 1.9.
-
-   a partial implementation of DES designed for use in the
-   SMB authentication protocol
-
-   Copyright (C) Andrew Tridgell 1998
-   Modified by Steve French (sfrench@us.ibm.com) 2002,2004
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-/* NOTES:
-
-   This code makes no attempt to be fast! In fact, it is a very
-   slow implementation
-
-   This code is NOT a complete DES implementation. It implements only
-   the minimum necessary for SMB authentication, as used by all SMB
-   products (including every copy of Microsoft Windows95 ever sold)
-
-   In particular, it can only do a unchained forward DES pass. This
-   means it is not possible to use this code for encryption/decryption
-   of data, instead it is only useful as a "hash" algorithm.
-
-   There is no entry point into this code that allows normal DES operation.
-
-   I believe this means that this code does not come under ITAR
-   regulations but this is NOT a legal opinion. If you are concerned
-   about the applicability of ITAR regulations to this code then you
-   should confirm it for yourself (and maybe let me know if you come
-   up with a different answer to the one above)
-*/
-#include <linux/slab.h>
-#define uchar unsigned char
-
-static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
-	1, 58, 50, 42, 34, 26, 18,
-	10, 2, 59, 51, 43, 35, 27,
-	19, 11, 3, 60, 52, 44, 36,
-	63, 55, 47, 39, 31, 23, 15,
-	7, 62, 54, 46, 38, 30, 22,
-	14, 6, 61, 53, 45, 37, 29,
-	21, 13, 5, 28, 20, 12, 4
-};
-
-static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
-	3, 28, 15, 6, 21, 10,
-	23, 19, 12, 4, 26, 8,
-	16, 7, 27, 20, 13, 2,
-	41, 52, 31, 37, 47, 55,
-	30, 40, 51, 45, 33, 48,
-	44, 49, 39, 56, 34, 53,
-	46, 42, 50, 36, 29, 32
-};
-
-static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
-	60, 52, 44, 36, 28, 20, 12, 4,
-	62, 54, 46, 38, 30, 22, 14, 6,
-	64, 56, 48, 40, 32, 24, 16, 8,
-	57, 49, 41, 33, 25, 17, 9, 1,
-	59, 51, 43, 35, 27, 19, 11, 3,
-	61, 53, 45, 37, 29, 21, 13, 5,
-	63, 55, 47, 39, 31, 23, 15, 7
-};
-
-static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
-	4, 5, 6, 7, 8, 9,
-	8, 9, 10, 11, 12, 13,
-	12, 13, 14, 15, 16, 17,
-	16, 17, 18, 19, 20, 21,
-	20, 21, 22, 23, 24, 25,
-	24, 25, 26, 27, 28, 29,
-	28, 29, 30, 31, 32, 1
-};
-
-static uchar perm5[32] = { 16, 7, 20, 21,
-	29, 12, 28, 17,
-	1, 15, 23, 26,
-	5, 18, 31, 10,
-	2, 8, 24, 14,
-	32, 27, 3, 9,
-	19, 13, 30, 6,
-	22, 11, 4, 25
-};
-
-static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
-	39, 7, 47, 15, 55, 23, 63, 31,
-	38, 6, 46, 14, 54, 22, 62, 30,
-	37, 5, 45, 13, 53, 21, 61, 29,
-	36, 4, 44, 12, 52, 20, 60, 28,
-	35, 3, 43, 11, 51, 19, 59, 27,
-	34, 2, 42, 10, 50, 18, 58, 26,
-	33, 1, 41, 9, 49, 17, 57, 25
-};
-
-static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
-
-static uchar sbox[8][4][16] = {
-	{{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
-	 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
-	 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
-	 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
-
-	{{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
-	 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
-	 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
-	 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
-
-	{{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
-	 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
-	 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
-	 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
-
-	{{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
-	 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
-	 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
-	 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
-
-	{{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
-	 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
-	 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
-	 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
-
-	{{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
-	 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
-	 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
-	 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
-
-	{{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
-	 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
-	 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
-	 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
-
-	{{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
-	 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
-	 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
-	 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
-};
-
-static void
-permute(char *out, char *in, uchar *p, int n)
-{
-	int i;
-	for (i = 0; i < n; i++)
-		out[i] = in[p[i] - 1];
-}
-
-static void
-lshift(char *d, int count, int n)
-{
-	char out[64];
-	int i;
-	for (i = 0; i < n; i++)
-		out[i] = d[(i + count) % n];
-	for (i = 0; i < n; i++)
-		d[i] = out[i];
-}
-
-static void
-concat(char *out, char *in1, char *in2, int l1, int l2)
-{
-	while (l1--)
-		*out++ = *in1++;
-	while (l2--)
-		*out++ = *in2++;
-}
-
-static void
-xor(char *out, char *in1, char *in2, int n)
-{
-	int i;
-	for (i = 0; i < n; i++)
-		out[i] = in1[i] ^ in2[i];
-}
-
-static void
-dohash(char *out, char *in, char *key, int forw)
-{
-	int i, j, k;
-	char *pk1;
-	char c[28];
-	char d[28];
-	char *cd;
-	char (*ki)[48];
-	char *pd1;
-	char l[32], r[32];
-	char *rl;
-
-	/* Have to reduce stack usage */
-	pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
-	if (pk1 == NULL)
-		return;
-
-	ki = kmalloc(16*48, GFP_KERNEL);
-	if (ki == NULL) {
-		kfree(pk1);
-		return;
-	}
-
-	cd = pk1 + 56;
-	pd1 = cd  + 56;
-	rl = pd1 + 64;
-
-	permute(pk1, key, perm1, 56);
-
-	for (i = 0; i < 28; i++)
-		c[i] = pk1[i];
-	for (i = 0; i < 28; i++)
-		d[i] = pk1[i + 28];
-
-	for (i = 0; i < 16; i++) {
-		lshift(c, sc[i], 28);
-		lshift(d, sc[i], 28);
-
-		concat(cd, c, d, 28, 28);
-		permute(ki[i], cd, perm2, 48);
-	}
-
-	permute(pd1, in, perm3, 64);
-
-	for (j = 0; j < 32; j++) {
-		l[j] = pd1[j];
-		r[j] = pd1[j + 32];
-	}
-
-	for (i = 0; i < 16; i++) {
-		char *er;  /* er[48]  */
-		char *erk; /* erk[48] */
-		char b[8][6];
-		char *cb;  /* cb[32]  */
-		char *pcb; /* pcb[32] */
-		char *r2;  /* r2[32]  */
-
-		er = kmalloc(48+48+32+32+32, GFP_KERNEL);
-		if (er == NULL) {
-			kfree(pk1);
-			kfree(ki);
-			return;
-		}
-		erk = er+48;
-		cb  = erk+48;
-		pcb = cb+32;
-		r2  = pcb+32;
-
-		permute(er, r, perm4, 48);
-
-		xor(erk, er, ki[forw ? i : 15 - i], 48);
-
-		for (j = 0; j < 8; j++)
-			for (k = 0; k < 6; k++)
-				b[j][k] = erk[j * 6 + k];
-
-		for (j = 0; j < 8; j++) {
-			int m, n;
-			m = (b[j][0] << 1) | b[j][5];
-
-			n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
-							       1) | b[j][4];
-
-			for (k = 0; k < 4; k++)
-				b[j][k] =
-				    (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
-		}
-
-		for (j = 0; j < 8; j++)
-			for (k = 0; k < 4; k++)
-				cb[j * 4 + k] = b[j][k];
-		permute(pcb, cb, perm5, 32);
-
-		xor(r2, l, pcb, 32);
-
-		for (j = 0; j < 32; j++)
-			l[j] = r[j];
-
-		for (j = 0; j < 32; j++)
-			r[j] = r2[j];
-
-		kfree(er);
-	}
-
-	concat(rl, r, l, 32, 32);
-
-	permute(out, rl, perm6, 64);
-	kfree(pk1);
-	kfree(ki);
-}
-
-static void
-str_to_key(unsigned char *str, unsigned char *key)
-{
-	int i;
-
-	key[0] = str[0] >> 1;
-	key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
-	key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
-	key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
-	key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
-	key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
-	key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
-	key[7] = str[6] & 0x7F;
-	for (i = 0; i < 8; i++)
-		key[i] = (key[i] << 1);
-}
-
-static void
-smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
-	int forw)
-{
-	int i;
-	char *outb; /* outb[64] */
-	char *inb;  /* inb[64]  */
-	char *keyb; /* keyb[64] */
-	unsigned char key2[8];
-
-	outb = kmalloc(64 * 3, GFP_KERNEL);
-	if (outb == NULL)
-		return;
-
-	inb  = outb + 64;
-	keyb = inb +  64;
-
-	str_to_key(key, key2);
-
-	for (i = 0; i < 64; i++) {
-		inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
-		keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
-		outb[i] = 0;
-	}
-
-	dohash(outb, inb, keyb, forw);
-
-	for (i = 0; i < 8; i++)
-		out[i] = 0;
-
-	for (i = 0; i < 64; i++) {
-		if (outb[i])
-			out[i / 8] |= (1 << (7 - (i % 8)));
-	}
-	kfree(outb);
-}
-
-void
-E_P16(unsigned char *p14, unsigned char *p16)
-{
-	unsigned char sp8[8] =
-	    { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
-	smbhash(p16, sp8, p14, 1);
-	smbhash(p16 + 8, sp8, p14 + 7, 1);
-}
-
-void
-E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
-{
-	smbhash(p24, c8, p21, 1);
-	smbhash(p24 + 8, c8, p21 + 7, 1);
-	smbhash(p24 + 16, c8, p21 + 14, 1);
-}
-
-#if 0 /* currently unused */
-static void
-D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
-{
-	smbhash(out, in, p14, 0);
-	smbhash(out + 8, in + 8, p14 + 7, 0);
-}
-
-static void
-E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
-{
-	smbhash(out, in, p14, 1);
-	smbhash(out + 8, in + 8, p14 + 7, 1);
-}
-/* these routines are currently unneeded, but may be
-	needed later */
-void
-cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
-{
-	unsigned char buf[8];
-
-	smbhash(buf, in, key, 1);
-	smbhash(out, buf, key + 9, 1);
-}
-
-void
-cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
-{
-	unsigned char buf[8];
-	static unsigned char key2[8];
-
-	smbhash(buf, in, key, 1);
-	key2[0] = key[7];
-	smbhash(out, buf, key2, 1);
-}
-
-void
-cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
-{
-	static unsigned char key2[8];
-
-	smbhash(out, in, key, forw);
-	key2[0] = key[7];
-	smbhash(out + 8, in + 8, key2, forw);
-}
-#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
 #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
 #define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
 
+static void
+str_to_key(unsigned char *str, unsigned char *key)
+{
+	int i;
+
+	key[0] = str[0] >> 1;
+	key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
+	key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
+	key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
+	key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
+	key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
+	key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
+	key[7] = str[6] & 0x7F;
+	for (i = 0; i < 8; i++)
+		key[i] = (key[i] << 1);
+}
+
+static int
+smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
+{
+	int rc;
+	unsigned char key2[8];
+	struct crypto_blkcipher *tfm_des;
+	struct scatterlist sgin, sgout;
+	struct blkcipher_desc desc;
+
+	str_to_key(key, key2);
+
+	tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm_des)) {
+		rc = PTR_ERR(tfm_des);
+		cERROR(1, "could not allocate des crypto API\n");
+		goto smbhash_err;
+	}
+
+	desc.tfm = tfm_des;
+
+	crypto_blkcipher_setkey(tfm_des, key2, 8);
+
+	sg_init_one(&sgin, in, 8);
+	sg_init_one(&sgout, out, 8);
+
+	rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
+	if (rc) {
+		cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
+		crypto_free_blkcipher(tfm_des);
+		goto smbhash_err;
+	}
+
+smbhash_err:
+	return rc;
+}
+
+static int
+E_P16(unsigned char *p14, unsigned char *p16)
+{
+	int rc;
+	unsigned char sp8[8] =
+	    { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
+
+	rc = smbhash(p16, sp8, p14);
+	if (rc)
+		return rc;
+	rc = smbhash(p16 + 8, sp8, p14 + 7);
+	return rc;
+}
+
+static int
+E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
+{
+	int rc;
+
+	rc = smbhash(p24, c8, p21);
+	if (rc)
+		return rc;
+	rc = smbhash(p24 + 8, c8, p21 + 7);
+	if (rc)
+		return rc;
+	rc = smbhash(p24 + 16, c8, p21 + 14);
+	return rc;
+}
+
 /* produce a md4 message digest from data of length n bytes */
 int
 mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
 	return rc;
 }
 
-/* Does the des encryption from the NT or LM MD4 hash. */
-static void
-SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
-	      unsigned char p24[24])
-{
-	unsigned char p21[21];
-
-	memset(p21, '\0', 21);
-
-	memcpy(p21, passwd, 16);
-	E_P24(p21, c8, p24);
-}
-
 /*
    This implements the X/Open SMB password encryption
    It takes a password, a 8 byte "crypt key" and puts 24 bytes of
    encrypted password into p24 */
 /* Note that password must be uppercased and null terminated */
-void
+int
 SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
 {
-	unsigned char p14[15], p21[21];
+	int rc;
+	unsigned char p14[14], p16[16], p21[21];
 
-	memset(p21, '\0', 21);
 	memset(p14, '\0', 14);
-	strncpy((char *) p14, (char *) passwd, 14);
+	memset(p16, '\0', 16);
+	memset(p21, '\0', 21);
 
-/*	strupper((char *)p14); *//* BB at least uppercase the easy range */
-	E_P16(p14, p21);
+	memcpy(p14, passwd, 14);
+	rc = E_P16(p14, p16);
+	if (rc)
+		return rc;
 
-	SMBOWFencrypt(p21, c8, p24);
+	memcpy(p21, p16, 16);
+	rc = E_P24(p21, c8, p24);
 
-	memset(p14, 0, 15);
-	memset(p21, 0, 21);
+	return rc;
 }
 
 /* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
 SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
 {
 	int rc;
-	unsigned char p21[21];
+	unsigned char p16[16], p21[21];
 
+	memset(p16, '\0', 16);
 	memset(p21, '\0', 21);
 
-	rc = E_md4hash(passwd, p21);
+	rc = E_md4hash(passwd, p16);
 	if (rc) {
 		cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
 		return rc;
 	}
-	SMBOWFencrypt(p21, c8, p24);
+	memcpy(p21, p16, 16);
+	rc = E_P24(p21, c8, p24);
 	return rc;
 }
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..f2513fb8c391 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
 	unsigned int len = iov[0].iov_len;
 	unsigned int total_len;
 	int first_vec = 0;
-	unsigned int smb_buf_length = smb_buffer->smb_buf_length;
+	unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
 	struct socket *ssocket = server->ssocket;
 
 	if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
 	else
 		smb_msg.msg_flags = MSG_NOSIGNAL;
 
-	/* smb header is converted in header_assemble. bcc and rest of SMB word
-	   area, and byte area if necessary, is converted to littleendian in
-	   cifssmb.c and RFC1001 len is converted to bigendian in smb_send
-	   Flags2 is converted in SendReceive */
-
-
 	total_len = 0;
 	for (i = 0; i < n_vec; i++)
 		total_len += iov[i].iov_len;
 
-	smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
 	cFYI(1, "Sending smb:  total_len %d", total_len);
 	dump_smb(smb_buffer, len);
 
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
 
 	/* Don't want to modify the buffer as a
 	   side effect of this call. */
-	smb_buffer->smb_buf_length = smb_buf_length;
+	smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
 
 	return rc;
 }
@@ -387,7 +380,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
 #ifdef CONFIG_CIFS_STATS2
 	atomic_inc(&server->inSend);
 #endif
-	rc = smb_send(server, in_buf, in_buf->smb_buf_length);
+	rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 #ifdef CONFIG_CIFS_STATS2
 	atomic_dec(&server->inSend);
 	mid->when_sent = jiffies;
@@ -422,7 +415,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
 	int resp_buf_type;
 
 	iov[0].iov_base = (char *)in_buf;
-	iov[0].iov_len = in_buf->smb_buf_length + 4;
+	iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
 	flags |= CIFS_NO_RESP;
 	rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
 	cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -488,10 +481,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
 	int rc = 0;
 
 	/* -4 for RFC1001 length and +2 for BCC field */
-	in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4  + 2;
+	in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4  + 2);
 	in_buf->Command = SMB_COM_NT_CANCEL;
 	in_buf->WordCount = 0;
-	put_bcc_le(0, in_buf);
+	put_bcc(0, in_buf);
 
 	mutex_lock(&server->srv_mutex);
 	rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +492,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
 		mutex_unlock(&server->srv_mutex);
 		return rc;
 	}
-	rc = smb_send(server, in_buf, in_buf->smb_buf_length);
+	rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 	mutex_unlock(&server->srv_mutex);
 
 	cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -612,7 +605,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 		return rc;
 	}
 
-	receive_len = midQ->resp_buf->smb_buf_length;
+	receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
 
 	if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
 		cERROR(1, "Frame too large received.  Length: %d  Xid: %d",
@@ -651,11 +644,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 		rc = map_smb_to_linux_error(midQ->resp_buf,
 					    flags & CIFS_LOG_ERROR);
 
-		/* convert ByteCount if necessary */
-		if (receive_len >= sizeof(struct smb_hdr) - 4
-		    /* do not count RFC1001 header */  +
-		    (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
-			put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
 		if ((flags & CIFS_NO_RESP) == 0)
 			midQ->resp_buf = NULL;  /* mark it so buf will
 						   not be freed by
@@ -698,9 +686,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 	   to the same server. We may make this configurable later or
 	   use ses->maxReq */
 
-	if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+	if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
+			MAX_CIFS_HDR_SIZE - 4) {
 		cERROR(1, "Illegal length, greater than maximum frame, %d",
-			   in_buf->smb_buf_length);
+			   be32_to_cpu(in_buf->smb_buf_length));
 		return -EIO;
 	}
 
@@ -733,7 +722,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 #ifdef CONFIG_CIFS_STATS2
 	atomic_inc(&ses->server->inSend);
 #endif
-	rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
+	rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 #ifdef CONFIG_CIFS_STATS2
 	atomic_dec(&ses->server->inSend);
 	midQ->when_sent = jiffies;
@@ -768,7 +757,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 		return rc;
 	}
 
-	receive_len = midQ->resp_buf->smb_buf_length;
+	receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
 
 	if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
 		cERROR(1, "Frame too large received.  Length: %d  Xid: %d",
@@ -781,7 +770,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 
 	if (midQ->resp_buf && out_buf
 	    && (midQ->midState == MID_RESPONSE_RECEIVED)) {
-		out_buf->smb_buf_length = receive_len;
+		out_buf->smb_buf_length = cpu_to_be32(receive_len);
 		memcpy((char *)out_buf + 4,
 		       (char *)midQ->resp_buf + 4,
 		       receive_len);
@@ -800,16 +789,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 			}
 		}
 
-		*pbytes_returned = out_buf->smb_buf_length;
+		*pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
 
 		/* BB special case reconnect tid and uid here? */
 		rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
-
-		/* convert ByteCount if necessary */
-		if (receive_len >= sizeof(struct smb_hdr) - 4
-		    /* do not count RFC1001 header */  +
-		    (2 * out_buf->WordCount) + 2 /* bcc */ )
-			put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
 	} else {
 		rc = -EIO;
 		cERROR(1, "Bad MID state?");
@@ -877,9 +860,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 	   to the same server. We may make this configurable later or
 	   use ses->maxReq */
 
-	if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+	if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
+			MAX_CIFS_HDR_SIZE - 4) {
 		cERROR(1, "Illegal length, greater than maximum frame, %d",
-			   in_buf->smb_buf_length);
+			   be32_to_cpu(in_buf->smb_buf_length));
 		return -EIO;
 	}
 
@@ -910,7 +894,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 #ifdef CONFIG_CIFS_STATS2
 	atomic_inc(&ses->server->inSend);
 #endif
-	rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
+	rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 #ifdef CONFIG_CIFS_STATS2
 	atomic_dec(&ses->server->inSend);
 	midQ->when_sent = jiffies;
@@ -977,7 +961,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 	if (rc != 0)
 		return rc;
 
-	receive_len = midQ->resp_buf->smb_buf_length;
+	receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
 	if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
 		cERROR(1, "Frame too large received.  Length: %d  Xid: %d",
 			receive_len, xid);
@@ -993,7 +977,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 		goto out;
 	}
 
-	out_buf->smb_buf_length = receive_len;
+	out_buf->smb_buf_length = cpu_to_be32(receive_len);
 	memcpy((char *)out_buf + 4,
 	       (char *)midQ->resp_buf + 4,
 	       receive_len);
@@ -1012,17 +996,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 		}
 	}
 
-	*pbytes_returned = out_buf->smb_buf_length;
+	*pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
 
 	/* BB special case reconnect tid and uid here? */
 	rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
 
-	/* convert ByteCount if necessary */
-	if (receive_len >= sizeof(struct smb_hdr) - 4
-	    /* do not count RFC1001 header */  +
-	    (2 * out_buf->WordCount) + 2 /* bcc */ )
-		put_bcc(get_bcc_le(out_buf), out_buf);
-
 out:
 	delete_mid(midQ);
 	if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..912995e013ec 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -112,6 +112,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 	struct cifsTconInfo *pTcon;
 	struct super_block *sb;
 	char *full_path;
+	struct cifs_ntsd *pacl;
 
 	if (direntry == NULL)
 		return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 		rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
 			(__u16)value_size, cifs_sb->local_nls,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	} else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
+			strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+		pacl = kmalloc(value_size, GFP_KERNEL);
+		if (!pacl) {
+			cFYI(1, "%s: Can't allocate memory for ACL",
+					__func__);
+			rc = -ENOMEM;
+		} else {
+#ifdef CONFIG_CIFS_ACL
+			memcpy(pacl, ea_value, value_size);
+			rc = set_cifs_acl(pacl, value_size,
+				direntry->d_inode, full_path);
+			if (rc == 0) /* force revalidate of the inode */
+				CIFS_I(direntry->d_inode)->time = 0;
+			kfree(pacl);
+#else
+			cFYI(1, "Set CIFS ACL not supported yet");
+#endif /* CONFIG_CIFS_ACL */
+		}
 	} else {
 		int temp;
 		temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 3313dd19f543..9a37a9b6de3a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock);
 static void configfs_d_iput(struct dentry * dentry,
 			    struct inode * inode)
 {
-	struct configfs_dirent * sd = dentry->d_fsdata;
+	struct configfs_dirent *sd = dentry->d_fsdata;
 
 	if (sd) {
 		BUG_ON(sd->s_dentry != dentry);
+		/* Coordinate with configfs_readdir */
+		spin_lock(&configfs_dirent_lock);
 		sd->s_dentry = NULL;
+		spin_unlock(&configfs_dirent_lock);
 		configfs_put(sd);
 	}
 	iput(inode);
@@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group,
 			sd = child->d_fsdata;
 			sd->s_type |= CONFIGFS_USET_DEFAULT;
 		} else {
-			d_delete(child);
+			BUG_ON(child->d_inode);
+			d_drop(child);
 			dput(child);
 		}
 	}
@@ -1545,7 +1549,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 	struct configfs_dirent * parent_sd = dentry->d_fsdata;
 	struct configfs_dirent *cursor = filp->private_data;
 	struct list_head *p, *q = &cursor->s_sibling;
-	ino_t ino;
+	ino_t ino = 0;
 	int i = filp->f_pos;
 
 	switch (i) {
@@ -1573,6 +1577,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 				struct configfs_dirent *next;
 				const char * name;
 				int len;
+				struct inode *inode = NULL;
 
 				next = list_entry(p, struct configfs_dirent,
 						   s_sibling);
@@ -1581,9 +1586,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 
 				name = configfs_get_name(next);
 				len = strlen(name);
-				if (next->s_dentry)
-					ino = next->s_dentry->d_inode->i_ino;
-				else
+
+				/*
+				 * We'll have a dentry and an inode for
+				 * PINNED items and for open attribute
+				 * files.  We lock here to prevent a race
+				 * with configfs_d_iput() clearing
+				 * s_dentry before calling iput().
+				 *
+				 * Why do we go to the trouble?  If
+				 * someone has an attribute file open,
+				 * the inode number should match until
+				 * they close it.  Beyond that, we don't
+				 * care.
+				 */
+				spin_lock(&configfs_dirent_lock);
+				dentry = next->s_dentry;
+				if (dentry)
+					inode = dentry->d_inode;
+				if (inode)
+					ino = inode->i_ino;
+				spin_unlock(&configfs_dirent_lock);
+				if (!inode)
 					ino = iunique(configfs_sb, 2);
 
 				if (filldir(dirent, name, len, filp->f_pos, ino,
@@ -1683,7 +1707,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 		err = configfs_attach_group(sd->s_element, &group->cg_item,
 					    dentry);
 		if (err) {
-			d_delete(dentry);
+			BUG_ON(dentry->d_inode);
+			d_drop(dentry);
 			dput(dentry);
 		} else {
 			spin_lock(&configfs_dirent_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index ad25c4cec7d5..18b2a1f10ed8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
 #include <linux/hardirq.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
+#include <linux/prefetch.h>
 #include "internal.h"
 
 /*
@@ -99,12 +100,9 @@ static struct kmem_cache *dentry_cache __read_mostly;
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
 
-struct dcache_hash_bucket {
-	struct hlist_bl_head head;
-};
-static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
+static struct hlist_bl_head *dentry_hashtable __read_mostly;
 
-static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
+static inline struct hlist_bl_head *d_hash(struct dentry *parent,
 					unsigned long hash)
 {
 	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -112,16 +110,6 @@ static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
 	return dentry_hashtable + (hash & D_HASHMASK);
 }
 
-static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
-{
-	bit_spin_lock(0, (unsigned long *)&b->head.first);
-}
-
-static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
-{
-	__bit_spin_unlock(0, (unsigned long *)&b->head.first);
-}
-
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
@@ -167,8 +155,8 @@ static void d_free(struct dentry *dentry)
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
 
-	/* if dentry was never inserted into hash, immediate free is OK */
-	if (hlist_bl_unhashed(&dentry->d_hash))
+	/* if dentry was never visible to RCU, immediate free is OK */
+	if (!(dentry->d_flags & DCACHE_RCUACCESS))
 		__d_free(&dentry->d_u.d_rcu);
 	else
 		call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -330,28 +318,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
  */
 void __d_drop(struct dentry *dentry)
 {
-	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
-		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
-			bit_spin_lock(0,
-				(unsigned long *)&dentry->d_sb->s_anon.first);
-			dentry->d_flags |= DCACHE_UNHASHED;
-			hlist_bl_del_init(&dentry->d_hash);
-			__bit_spin_unlock(0,
-				(unsigned long *)&dentry->d_sb->s_anon.first);
-		} else {
-			struct dcache_hash_bucket *b;
+	if (!d_unhashed(dentry)) {
+		struct hlist_bl_head *b;
+		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+			b = &dentry->d_sb->s_anon;
+		else
 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
-			spin_lock_bucket(b);
-			/*
-			 * We may not actually need to put DCACHE_UNHASHED
-			 * manipulations under the hash lock, but follow
-			 * the principle of least surprise.
-			 */
-			dentry->d_flags |= DCACHE_UNHASHED;
-			hlist_bl_del_rcu(&dentry->d_hash);
-			spin_unlock_bucket(b);
-			dentry_rcuwalk_barrier(dentry);
-		}
+
+		hlist_bl_lock(b);
+		__hlist_bl_del(&dentry->d_hash);
+		dentry->d_hash.pprev = NULL;
+		hlist_bl_unlock(b);
+
+		dentry_rcuwalk_barrier(dentry);
 	}
 }
 EXPORT_SYMBOL(__d_drop);
@@ -1304,7 +1283,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dname[name->len] = 0;
 
 	dentry->d_count = 1;
-	dentry->d_flags = DCACHE_UNHASHED;
+	dentry->d_flags = 0;
 	spin_lock_init(&dentry->d_lock);
 	seqcount_init(&dentry->d_seq);
 	dentry->d_inode = NULL;
@@ -1606,10 +1585,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	tmp->d_inode = inode;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	list_add(&tmp->d_alias, &inode->i_dentry);
-	bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
-	tmp->d_flags &= ~DCACHE_UNHASHED;
+	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
-	__bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
+	hlist_bl_unlock(&tmp->d_sb->s_anon);
 	spin_unlock(&tmp->d_lock);
 	spin_unlock(&inode->i_lock);
 	security_d_instantiate(tmp, inode);
@@ -1789,7 +1767,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
-	struct dcache_hash_bucket *b = d_hash(parent, hash);
+	struct hlist_bl_head *b = d_hash(parent, hash);
 	struct hlist_bl_node *node;
 	struct dentry *dentry;
 
@@ -1813,7 +1791,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	 *
 	 * See Documentation/filesystems/path-lookup.txt for more details.
 	 */
-	hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
 		struct inode *i;
 		const char *tname;
 		int tlen;
@@ -1908,7 +1886,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
-	struct dcache_hash_bucket *b = d_hash(parent, hash);
+	struct hlist_bl_head *b = d_hash(parent, hash);
 	struct hlist_bl_node *node;
 	struct dentry *found = NULL;
 	struct dentry *dentry;
@@ -1935,7 +1913,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 	 */
 	rcu_read_lock();
 	
-	hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
 		const char *tname;
 		int tlen;
 
@@ -2086,13 +2064,13 @@ again:
 }
 EXPORT_SYMBOL(d_delete);
 
-static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
+static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
 	BUG_ON(!d_unhashed(entry));
-	spin_lock_bucket(b);
- 	entry->d_flags &= ~DCACHE_UNHASHED;
-	hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
-	spin_unlock_bucket(b);
+	hlist_bl_lock(b);
+	entry->d_flags |= DCACHE_RCUACCESS;
+	hlist_bl_add_head_rcu(&entry->d_hash, b);
+	hlist_bl_unlock(b);
 }
 
 static void _d_rehash(struct dentry * entry)
@@ -2131,7 +2109,7 @@ EXPORT_SYMBOL(d_rehash);
  */
 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 {
-	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+	BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
 	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
 
 	spin_lock(&dentry->d_lock);
@@ -3025,7 +3003,7 @@ static void __init dcache_init_early(void)
 
 	dentry_hashtable =
 		alloc_large_system_hash("Dentry cache",
-					sizeof(struct dcache_hash_bucket),
+					sizeof(struct hlist_bl_head),
 					dhash_entries,
 					13,
 					HASH_EARLY,
@@ -3034,7 +3012,7 @@ static void __init dcache_init_early(void)
 					0);
 
 	for (loop = 0; loop < (1 << d_hash_shift); loop++)
-		INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
+		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 }
 
 static void __init dcache_init(void)
@@ -3057,7 +3035,7 @@ static void __init dcache_init(void)
 
 	dentry_hashtable =
 		alloc_large_system_hash("Dentry cache",
-					sizeof(struct dcache_hash_bucket),
+					sizeof(struct hlist_bl_head),
 					dhash_entries,
 					13,
 					0,
@@ -3066,7 +3044,7 @@ static void __init dcache_init(void)
 					0);
 
 	for (loop = 0; loop < (1 << d_hash_shift); loop++)
-		INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
+		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 }
 
 /* SLAB cache for __getname() consumers */
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
 			       size_t count, loff_t *ppos)
 {
 	char buf[32];
-	int buf_size;
+	size_t buf_size;
+	bool bv;
 	u32 *val = file->private_data;
 
 	buf_size = min(count, (sizeof(buf)-1));
 	if (copy_from_user(buf, user_buf, buf_size))
 		return -EFAULT;
 
-	switch (buf[0]) {
-	case 'y':
-	case 'Y':
-	case '1':
-		*val = 1;
-		break;
-	case 'n':
-	case 'N':
-	case '0':
-		*val = 0;
-		break;
-	}
-	
+	if (strtobool(buf, &bv) == 0)
+		*val = bv;
+
 	return count;
 }
 
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index d2a70a4561f9..b8d5c8091024 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1452,6 +1452,25 @@ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
 	crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 }
 
+void ecryptfs_i_size_init(const char *page_virt, struct inode *inode)
+{
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+	struct ecryptfs_crypt_stat *crypt_stat;
+	u64 file_size;
+
+	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+	mount_crypt_stat =
+		&ecryptfs_superblock_to_private(inode->i_sb)->mount_crypt_stat;
+	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
+		file_size = i_size_read(ecryptfs_inode_to_lower(inode));
+		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
+			file_size += crypt_stat->metadata_size;
+	} else
+		file_size = get_unaligned_be64(page_virt);
+	i_size_write(inode, (loff_t)file_size);
+	crypt_stat->flags |= ECRYPTFS_I_SIZE_INITIALIZED;
+}
+
 /**
  * ecryptfs_read_headers_virt
  * @page_virt: The virtual address into which to read the headers
@@ -1482,6 +1501,8 @@ static int ecryptfs_read_headers_virt(char *page_virt,
 		rc = -EINVAL;
 		goto out;
 	}
+	if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
+		ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
 	offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
 	rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
 				    &bytes_read);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index bd3cafd0949d..e70282775e2c 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat {
 #define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00000800
 #define ECRYPTFS_ENCFN_USE_FEK        0x00001000
 #define ECRYPTFS_UNLINK_SIGS          0x00002000
+#define ECRYPTFS_I_SIZE_INITIALIZED   0x00004000
 	u32 flags;
 	unsigned int file_version;
 	size_t iv_bytes;
@@ -295,6 +296,8 @@ struct ecryptfs_crypt_stat {
 struct ecryptfs_inode_info {
 	struct inode vfs_inode;
 	struct inode *wii_inode;
+	struct mutex lower_file_mutex;
+	atomic_t lower_file_count;
 	struct file *lower_file;
 	struct ecryptfs_crypt_stat crypt_stat;
 };
@@ -626,6 +629,7 @@ struct ecryptfs_open_req {
 int ecryptfs_interpose(struct dentry *hidden_dentry,
 		       struct dentry *this_dentry, struct super_block *sb,
 		       u32 flags);
+void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
 int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 					struct dentry *lower_dentry,
 					struct inode *ecryptfs_dir_inode);
@@ -757,7 +761,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
 			     struct dentry *lower_dentry,
 			     struct vfsmount *lower_mnt,
 			     const struct cred *cred);
-int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
+int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry);
+void ecryptfs_put_lower_file(struct inode *inode);
 int
 ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
 			     size_t *packet_size,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index cedc913d11ba..566e5472f78c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -191,10 +191,10 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 				      | ECRYPTFS_ENCRYPTED);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
-	rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+	rc = ecryptfs_get_lower_file(ecryptfs_dentry);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to initialize "
-			"the persistent file for the dentry with name "
+			"the lower file for the dentry with name "
 			"[%s]; rc = [%d]\n", __func__,
 			ecryptfs_dentry->d_name.name, rc);
 		goto out_free;
@@ -202,9 +202,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
 	    == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
 		rc = -EPERM;
-		printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
+		printk(KERN_WARNING "%s: Lower file is RO; eCryptfs "
 		       "file must hence be opened RO\n", __func__);
-		goto out_free;
+		goto out_put;
 	}
 	ecryptfs_set_file_lower(
 		file, ecryptfs_inode_to_private(inode)->lower_file);
@@ -232,10 +232,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 				       "Plaintext passthrough mode is not "
 				       "enabled; returning -EIO\n");
 				mutex_unlock(&crypt_stat->cs_mutex);
-				goto out_free;
+				goto out_put;
 			}
 			rc = 0;
-			crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+			crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
+					       | ECRYPTFS_ENCRYPTED);
 			mutex_unlock(&crypt_stat->cs_mutex);
 			goto out;
 		}
@@ -245,6 +246,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 			"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
 			(unsigned long long)i_size_read(inode));
 	goto out;
+out_put:
+	ecryptfs_put_lower_file(inode);
 out_free:
 	kmem_cache_free(ecryptfs_file_info_cache,
 			ecryptfs_file_to_private(file));
@@ -254,17 +257,13 @@ out:
 
 static int ecryptfs_flush(struct file *file, fl_owner_t td)
 {
-	int rc = 0;
-	struct file *lower_file = NULL;
-
-	lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file->f_op && lower_file->f_op->flush)
-		rc = lower_file->f_op->flush(lower_file, td);
-	return rc;
+	return file->f_mode & FMODE_WRITE
+	       ? filemap_write_and_wait(file->f_mapping) : 0;
 }
 
 static int ecryptfs_release(struct inode *inode, struct file *file)
 {
+	ecryptfs_put_lower_file(inode);
 	kmem_cache_free(ecryptfs_file_info_cache,
 			ecryptfs_file_to_private(file));
 	return 0;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f99051b7adab..4d4cc6a90cd5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -168,19 +168,18 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 				"context; rc = [%d]\n", rc);
 		goto out;
 	}
-	rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+	rc = ecryptfs_get_lower_file(ecryptfs_dentry);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to initialize "
-			"the persistent file for the dentry with name "
+			"the lower file for the dentry with name "
 			"[%s]; rc = [%d]\n", __func__,
 			ecryptfs_dentry->d_name.name, rc);
 		goto out;
 	}
 	rc = ecryptfs_write_metadata(ecryptfs_dentry);
-	if (rc) {
+	if (rc)
 		printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
-		goto out;
-	}
+	ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
 out:
 	return rc;
 }
@@ -226,11 +225,9 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 	struct dentry *lower_dir_dentry;
 	struct vfsmount *lower_mnt;
 	struct inode *lower_inode;
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct ecryptfs_crypt_stat *crypt_stat;
 	char *page_virt = NULL;
-	u64 file_size;
-	int rc = 0;
+	int put_lower = 0, rc = 0;
 
 	lower_dir_dentry = lower_dentry->d_parent;
 	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
@@ -277,14 +274,15 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 		rc = -ENOMEM;
 		goto out;
 	}
-	rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+	rc = ecryptfs_get_lower_file(ecryptfs_dentry);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to initialize "
-			"the persistent file for the dentry with name "
+			"the lower file for the dentry with name "
 			"[%s]; rc = [%d]\n", __func__,
 			ecryptfs_dentry->d_name.name, rc);
 		goto out_free_kmem;
 	}
+	put_lower = 1;
 	crypt_stat = &ecryptfs_inode_to_private(
 					ecryptfs_dentry->d_inode)->crypt_stat;
 	/* TODO: lock for crypt_stat comparison */
@@ -302,18 +300,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 		}
 		crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
 	}
-	mount_crypt_stat = &ecryptfs_superblock_to_private(
-		ecryptfs_dentry->d_sb)->mount_crypt_stat;
-	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
-		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-			file_size = (crypt_stat->metadata_size
-				     + i_size_read(lower_dentry->d_inode));
-		else
-			file_size = i_size_read(lower_dentry->d_inode);
-	} else {
-		file_size = get_unaligned_be64(page_virt);
-	}
-	i_size_write(ecryptfs_dentry->d_inode, (loff_t)file_size);
+	ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
 out_free_kmem:
 	kmem_cache_free(ecryptfs_header_cache_2, page_virt);
 	goto out;
@@ -322,6 +309,8 @@ out_put:
 	mntput(lower_mnt);
 	d_drop(ecryptfs_dentry);
 out:
+	if (put_lower)
+		ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
 	return rc;
 }
 
@@ -538,8 +527,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 	dget(lower_dentry);
 	rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
 	dput(lower_dentry);
-	if (!rc)
-		d_delete(lower_dentry);
 	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
 	dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
 	unlock_dir(lower_dir_dentry);
@@ -610,8 +597,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 out_lock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
-	dput(lower_new_dentry->d_parent);
-	dput(lower_old_dentry->d_parent);
+	dput(lower_new_dir_dentry);
+	dput(lower_old_dir_dentry);
 	dput(lower_new_dentry);
 	dput(lower_old_dentry);
 	return rc;
@@ -759,8 +746,11 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 
 	if (unlikely((ia->ia_size == i_size))) {
 		lower_ia->ia_valid &= ~ATTR_SIZE;
-		goto out;
+		return 0;
 	}
+	rc = ecryptfs_get_lower_file(dentry);
+	if (rc)
+		return rc;
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
 	/* Switch on growing or shrinking file */
 	if (ia->ia_size > i_size) {
@@ -838,6 +828,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 			lower_ia->ia_valid &= ~ATTR_SIZE;
 	}
 out:
+	ecryptfs_put_lower_file(inode);
 	return rc;
 }
 
@@ -913,7 +904,13 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 
 		mount_crypt_stat = &ecryptfs_superblock_to_private(
 			dentry->d_sb)->mount_crypt_stat;
+		rc = ecryptfs_get_lower_file(dentry);
+		if (rc) {
+			mutex_unlock(&crypt_stat->cs_mutex);
+			goto out;
+		}
 		rc = ecryptfs_read_metadata(dentry);
+		ecryptfs_put_lower_file(inode);
 		if (rc) {
 			if (!(mount_crypt_stat->flags
 			      & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
@@ -927,10 +924,17 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 				goto out;
 			}
 			rc = 0;
-			crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+			crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
+					       | ECRYPTFS_ENCRYPTED);
 		}
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
+	if (S_ISREG(inode->i_mode)) {
+		rc = filemap_write_and_wait(inode->i_mapping);
+		if (rc)
+			goto out;
+		fsstack_copy_attr_all(inode, lower_inode);
+	}
 	memcpy(&lower_ia, ia, sizeof(lower_ia));
 	if (ia->ia_valid & ATTR_FILE)
 		lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 0851ab6980f5..69f994a7d524 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -44,7 +44,7 @@ static struct task_struct *ecryptfs_kthread;
  * @ignored: ignored
  *
  * The eCryptfs kernel thread that has the responsibility of getting
- * the lower persistent file with RW permissions.
+ * the lower file with RW permissions.
  *
  * Returns zero on success; non-zero otherwise
  */
@@ -141,8 +141,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	int rc = 0;
 
 	/* Corresponding dput() and mntput() are done when the
-	 * persistent file is fput() when the eCryptfs inode is
-	 * destroyed. */
+	 * lower file is fput() when all eCryptfs files for the inode are
+	 * released. */
 	dget(lower_dentry);
 	mntget(lower_mnt);
 	flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index fdb2eb0ad09e..89b93389af8e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -96,7 +96,7 @@ void __ecryptfs_printk(const char *fmt, ...)
 }
 
 /**
- * ecryptfs_init_persistent_file
+ * ecryptfs_init_lower_file
  * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with
  *                   the lower dentry and the lower mount set
  *
@@ -104,42 +104,70 @@ void __ecryptfs_printk(const char *fmt, ...)
  * inode. All I/O operations to the lower inode occur through that
  * file. When the first eCryptfs dentry that interposes with the first
  * lower dentry for that inode is created, this function creates the
- * persistent file struct and associates it with the eCryptfs
- * inode. When the eCryptfs inode is destroyed, the file is closed.
+ * lower file struct and associates it with the eCryptfs
+ * inode. When all eCryptfs files associated with the inode are released, the
+ * file is closed.
  *
- * The persistent file will be opened with read/write permissions, if
+ * The lower file will be opened with read/write permissions, if
  * possible. Otherwise, it is opened read-only.
  *
- * This function does nothing if a lower persistent file is already
+ * This function does nothing if a lower file is already
  * associated with the eCryptfs inode.
  *
  * Returns zero on success; non-zero otherwise
  */
-int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
+static int ecryptfs_init_lower_file(struct dentry *dentry,
+				    struct file **lower_file)
 {
 	const struct cred *cred = current_cred();
-	struct ecryptfs_inode_info *inode_info =
-		ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
-	int rc = 0;
+	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+	struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+	int rc;
 
-	if (!inode_info->lower_file) {
-		struct dentry *lower_dentry;
-		struct vfsmount *lower_mnt =
-			ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
+	rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt,
+				      cred);
+	if (rc) {
+		printk(KERN_ERR "Error opening lower file "
+		       "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
+		       "rc = [%d]\n", lower_dentry, lower_mnt, rc);
+		(*lower_file) = NULL;
+	}
+	return rc;
+}
 
-		lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
-		rc = ecryptfs_privileged_open(&inode_info->lower_file,
-					      lower_dentry, lower_mnt, cred);
-		if (rc) {
-			printk(KERN_ERR "Error opening lower persistent file "
-			       "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
-			       "rc = [%d]\n", lower_dentry, lower_mnt, rc);
-			inode_info->lower_file = NULL;
-		}
+int ecryptfs_get_lower_file(struct dentry *dentry)
+{
+	struct ecryptfs_inode_info *inode_info =
+		ecryptfs_inode_to_private(dentry->d_inode);
+	int count, rc = 0;
+
+	mutex_lock(&inode_info->lower_file_mutex);
+	count = atomic_inc_return(&inode_info->lower_file_count);
+	if (WARN_ON_ONCE(count < 1))
+		rc = -EINVAL;
+	else if (count == 1) {
+		rc = ecryptfs_init_lower_file(dentry,
+					      &inode_info->lower_file);
+		if (rc)
+			atomic_set(&inode_info->lower_file_count, 0);
 	}
+	mutex_unlock(&inode_info->lower_file_mutex);
 	return rc;
 }
 
+void ecryptfs_put_lower_file(struct inode *inode)
+{
+	struct ecryptfs_inode_info *inode_info;
+
+	inode_info = ecryptfs_inode_to_private(inode);
+	if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
+				      &inode_info->lower_file_mutex)) {
+		fput(inode_info->lower_file);
+		inode_info->lower_file = NULL;
+		mutex_unlock(&inode_info->lower_file_mutex);
+	}
+}
+
 static struct inode *ecryptfs_get_inode(struct inode *lower_inode,
 		       struct super_block *sb)
 {
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index bacc882e1ae4..245b517bf1b6 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -55,6 +55,8 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
 	if (unlikely(!inode_info))
 		goto out;
 	ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
+	mutex_init(&inode_info->lower_file_mutex);
+	atomic_set(&inode_info->lower_file_count, 0);
 	inode_info->lower_file = NULL;
 	inode = &inode_info->vfs_inode;
 out:
@@ -77,8 +79,7 @@ static void ecryptfs_i_callback(struct rcu_head *head)
  *
  * This is used during the final destruction of the inode.  All
  * allocation of memory related to the inode, including allocated
- * memory in the crypt_stat struct, will be released here. This
- * function also fput()'s the persistent file for the lower inode.
+ * memory in the crypt_stat struct, will be released here.
  * There should be no chance that this deallocation will be missed.
  */
 static void ecryptfs_destroy_inode(struct inode *inode)
@@ -86,16 +87,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
 	struct ecryptfs_inode_info *inode_info;
 
 	inode_info = ecryptfs_inode_to_private(inode);
-	if (inode_info->lower_file) {
-		struct dentry *lower_dentry =
-			inode_info->lower_file->f_dentry;
-
-		BUG_ON(!lower_dentry);
-		if (lower_dentry->d_inode) {
-			fput(inode_info->lower_file);
-			inode_info->lower_file = NULL;
-		}
-	}
+	BUG_ON(inode_info->lower_file);
 	ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
 	call_rcu(&inode->i_rcu, ecryptfs_i_callback);
 }
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..8328beb9016f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1659,6 +1659,7 @@ static int zap_process(struct task_struct *start, int exit_code)
 
 	t = start;
 	do {
+		task_clear_group_stop_pending(t);
 		if (t != current && t->mm) {
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index e25e99bf7ee1..d0f53538a57f 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -86,8 +86,8 @@
 
 #ifdef CONFIG_QUOTA
 /* Amount of blocks needed for quota update - we know that the structure was
- * allocated so we need to update only inode+data */
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
+ * allocated so we need to update only data block */
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
 /* Amount of blocks needed for quota insert/delete - we do some block writes
  * but inode, sb and group updates are done only once */
 #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 4673bc05274f..e9473cbe80df 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode)
  * the parent directory's parent as well, and so on recursively, if
  * they are also freshly created.
  */
-static void ext4_sync_parent(struct inode *inode)
+static int ext4_sync_parent(struct inode *inode)
 {
+	struct writeback_control wbc;
 	struct dentry *dentry = NULL;
+	int ret = 0;
 
 	while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
 		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode)
 		if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
 			break;
 		inode = dentry->d_parent->d_inode;
-		sync_mapping_buffers(inode->i_mapping);
+		ret = sync_mapping_buffers(inode->i_mapping);
+		if (ret)
+			break;
+		memset(&wbc, 0, sizeof(wbc));
+		wbc.sync_mode = WB_SYNC_ALL;
+		wbc.nr_to_write = 0;         /* only write out the inode */
+		ret = sync_inode(inode, &wbc);
+		if (ret)
+			break;
 	}
+	return ret;
 }
 
 /*
@@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync)
 	if (!journal) {
 		ret = generic_file_fsync(file, datasync);
 		if (!ret && !list_empty(&inode->i_dentry))
-			ext4_sync_parent(inode);
+			ret = ext4_sync_parent(inode);
 		goto out;
 	}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ad8e303c0d29..f2fa5e8a582c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 		 * for partial write.
 		 */
 		set_buffer_new(bh);
+		set_buffer_mapped(bh);
 	}
 	return 0;
 }
@@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode)
 	Indirect chain[4];
 	Indirect *partial;
 	__le32 nr = 0;
-	int n;
-	ext4_lblk_t last_block;
+	int n = 0;
+	ext4_lblk_t last_block, max_block;
 	unsigned blocksize = inode->i_sb->s_blocksize;
 
 	trace_ext4_truncate_enter(inode);
@@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode)
 
 	last_block = (inode->i_size + blocksize-1)
 					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+	max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
+					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
 	if (inode->i_size & (blocksize - 1))
 		if (ext4_block_truncate_page(handle, mapping, inode->i_size))
 			goto out_stop;
 
-	n = ext4_block_to_path(inode, last_block, offsets, NULL);
-	if (n == 0)
-		goto out_stop;	/* error */
+	if (last_block != max_block) {
+		n = ext4_block_to_path(inode, last_block, offsets, NULL);
+		if (n == 0)
+			goto out_stop;	/* error */
+	}
 
 	/*
 	 * OK.  This truncate is going to happen.  We add the inode to the
@@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode)
 	 */
 	ei->i_disksize = inode->i_size;
 
-	if (n == 1) {		/* direct blocks */
+	if (last_block == max_block) {
+		/*
+		 * It is unnecessary to free any data blocks if last_block is
+		 * equal to the indirect block limit.
+		 */
+		goto out_unlock;
+	} else if (n == 1) {		/* direct blocks */
 		ext4_free_data(handle, inode, NULL, i_data+offsets[0],
 			       i_data + EXT4_NDIR_BLOCKS);
 		goto do_indirects;
@@ -4553,6 +4564,7 @@ do_indirects:
 		;
 	}
 
+out_unlock:
 	up_write(&ei->i_data_sem);
 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
@@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
 	/* if nrblocks are contiguous */
 	if (chunk) {
 		/*
-		 * With N contiguous data blocks, it need at most
-		 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
-		 * 2 dindirect blocks
-		 * 1 tindirect block
+		 * With N contiguous data blocks, we need at most
+		 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
+		 * 2 dindirect blocks, and 1 tindirect block
 		 */
-		indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
-		return indirects + 3;
+		return DIV_ROUND_UP(nrblocks,
+				    EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
 	}
 	/*
 	 * if nrblocks are not contiguous, worse case, each block touch
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 056474b7b8e0..8553dfb310af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
  * journal_end calls result in the superblock being marked dirty, so
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
+ *
+ * To avoid j_barrier hold in userspace when a user calls freeze(),
+ * ext4 prevents a new handle from being started by s_frozen, which
+ * is in an upper layer.
  */
 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 {
 	journal_t *journal;
+	handle_t  *handle;
 
 	if (sb->s_flags & MS_RDONLY)
 		return ERR_PTR(-EROFS);
 
-	vfs_check_frozen(sb, SB_FREEZE_TRANS);
-	/* Special case here: if the journal has aborted behind our
-	 * backs (eg. EIO in the commit thread), then we still need to
-	 * take the FS itself readonly cleanly. */
 	journal = EXT4_SB(sb)->s_journal;
-	if (journal) {
-		if (is_journal_aborted(journal)) {
-			ext4_abort(sb, "Detected aborted journal");
-			return ERR_PTR(-EROFS);
-		}
-		return jbd2_journal_start(journal, nblocks);
+	handle = ext4_journal_current_handle();
+
+	/*
+	 * If a handle has been started, it should be allowed to
+	 * finish, otherwise deadlock could happen between freeze
+	 * and others(e.g. truncate) due to the restart of the
+	 * journal handle if the filesystem is forzen and active
+	 * handles are not stopped.
+	 */
+	if (!handle)
+		vfs_check_frozen(sb, SB_FREEZE_TRANS);
+
+	if (!journal)
+		return ext4_get_nojournal();
+	/*
+	 * Special case here: if the journal has aborted behind our
+	 * backs (eg. EIO in the commit thread), then we still need to
+	 * take the FS itself readonly cleanly.
+	 */
+	if (is_journal_aborted(journal)) {
+		ext4_abort(sb, "Detected aborted journal");
+		return ERR_PTR(-EROFS);
 	}
-	return ext4_get_nojournal();
+	return jbd2_journal_start(journal, nblocks);
 }
 
 /*
@@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
 	mutex_unlock(&ext4_li_info->li_list_mtx);
 
 	sbi->s_li_request = elr;
+	/*
+	 * set elr to NULL here since it has been inserted to
+	 * the request_list and the removal and free of it is
+	 * handled by ext4_clear_request_list from now on.
+	 */
+	elr = NULL;
 
 	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
 		ret = ext4_run_lazyinit_thread();
@@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	spin_lock_init(&sbi->s_next_gen_lock);
 
+	init_timer(&sbi->s_err_report);
+	sbi->s_err_report.function = print_daily_error_info;
+	sbi->s_err_report.data = (unsigned long) sb;
+
 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
 			ext4_count_free_blocks(sb));
 	if (!err) {
@@ -3646,9 +3673,6 @@ no_journal:
 		 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
 		 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
 
-	init_timer(&sbi->s_err_report);
-	sbi->s_err_report.function = print_daily_error_info;
-	sbi->s_err_report.data = (unsigned long) sb;
 	if (es->s_error_count)
 		mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
 
@@ -3672,6 +3696,7 @@ failed_mount_wq:
 		sbi->s_journal = NULL;
 	}
 failed_mount3:
+	del_timer(&sbi->s_err_report);
 	if (sbi->s_flex_groups) {
 		if (is_vmalloc_addr(sbi->s_flex_groups))
 			vfree(sbi->s_flex_groups);
@@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 /*
  * LVM calls this function before a (read-only) snapshot is created.  This
  * gives us a chance to flush the journal completely and mark the fs clean.
+ *
+ * Note that only this function cannot bring a filesystem to be in a clean
+ * state independently, because ext4 prevents a new handle from being started
+ * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from
+ * the upper layer.
  */
 static int ext4_freeze(struct super_block *sb)
 {
@@ -4614,11 +4644,24 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 
 static int ext4_quota_off(struct super_block *sb, int type)
 {
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	handle_t *handle;
+
 	/* Force all delayed allocation blocks to be allocated.
 	 * Caller already holds s_umount sem */
 	if (test_opt(sb, DELALLOC))
 		sync_filesystem(sb);
 
+	/* Update modification times of quota files when userspace can
+	 * start looking at them */
+	handle = ext4_journal_start(inode, 1);
+	if (IS_ERR(handle))
+		goto out;
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_journal_stop(handle);
+
+out:
 	return dquot_quota_off(sb, type);
 }
 
@@ -4714,9 +4757,8 @@ out:
 	if (inode->i_size < off + len) {
 		i_size_write(inode, off + len);
 		EXT4_I(inode)->i_disksize = inode->i_size;
+		ext4_mark_inode_dirty(handle, inode);
 	}
-	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	ext4_mark_inode_dirty(handle, inode);
 	mutex_unlock(&inode->i_mutex);
 	return len;
 }
diff --git a/fs/fhandle.c b/fs/fhandle.c
index bf93ad2bee07..6b088641f5bf 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -7,6 +7,7 @@
 #include <linux/exportfs.h>
 #include <linux/fs_struct.h>
 #include <linux/fsnotify.h>
+#include <linux/personality.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
diff --git a/fs/file.c b/fs/file.c
index 0be344755c02..4c6992d8f3ba 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/mmzone.h>
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -39,14 +40,17 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
  */
 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
 
-static inline void *alloc_fdmem(unsigned int size)
+static void *alloc_fdmem(unsigned int size)
 {
-	void *data;
-
-	data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
-	if (data != NULL)
-		return data;
-
+	/*
+	 * Very large allocations can stress page reclaim, so fall back to
+	 * vmalloc() if the allocation size will be considered "large" by the VM.
+	 */
+	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+		void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
+		if (data != NULL)
+			return data;
+	}
 	return vmalloc(size);
 }
 
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 751d6b255a12..0845f84f2a5f 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -110,14 +110,13 @@ int unregister_filesystem(struct file_system_type * fs)
 			*tmp = fs->next;
 			fs->next = NULL;
 			write_unlock(&file_systems_lock);
+			synchronize_rcu();
 			return 0;
 		}
 		tmp = &(*tmp)->next;
 	}
 	write_unlock(&file_systems_lock);
 
-	synchronize_rcu();
-
 	return -EINVAL;
 }
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c6ba49bd95b3..b32eb29a4e6f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		if (!inode)
 			return 0;
 
-		if (nd->flags & LOOKUP_RCU)
+		if (nd && (nd->flags & LOOKUP_RCU))
 			return -ECHILD;
 
 		fc = get_fuse_conn(inode);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
 ccflags-y := -I$(src)
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
-	glops.o inode.o log.o lops.o main.o meta_io.o \
+	glops.o log.o lops.o main.o meta_io.o \
 	aops.o dentry.o export.o file.o \
-	ops_fstype.o ops_inode.o quota.o \
+	ops_fstype.o inode.o quota.o \
 	recovery.o rgrp.o super.o sys.o trans.o util.o
 
 gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index c71995b111bf..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -884,8 +884,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	}
 
 	brelse(dibh);
-	gfs2_trans_end(sdp);
 failed:
+	gfs2_trans_end(sdp);
 	if (al) {
 		gfs2_inplace_release(ip);
 		gfs2_quota_unlock(ip);
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
 		bd = bh->b_private;
 		if (bd && bd->bd_ail)
 			goto cannot_release;
-		gfs2_assert_warn(sdp, !buffer_pinned(bh));
-		gfs2_assert_warn(sdp, !buffer_dirty(bh));
+		if (buffer_pinned(bh) || buffer_dirty(bh))
+			goto not_possible;
 		bh = bh->b_this_page;
 	} while(bh != head);
 	gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
 	} while (bh != head);
 
 	return try_to_free_buffers(page);
+
+not_possible: /* Should never happen */
+	WARN_ON(buffer_dirty(bh));
+	WARN_ON(buffer_pinned(bh));
 cannot_release:
 	gfs2_log_unlock(sdp);
 	return 0;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5c356d09c321..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
 struct qstr gfs2_qdot __read_mostly;
 struct qstr gfs2_qdotdot __read_mostly;
 
-typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
-			    u64 leaf_no, void *data);
 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
 			    const struct qstr *name, void *opaque);
 
-
 int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
 			    struct buffer_head **bhp)
 {
@@ -1506,7 +1503,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
 		inode = gfs2_inode_lookup(dir->i_sb, 
 				be16_to_cpu(dent->de_type),
 				be64_to_cpu(dent->de_inum.no_addr),
-				be64_to_cpu(dent->de_inum.no_formal_ino));
+				be64_to_cpu(dent->de_inum.no_formal_ino), 0);
 		brelse(bh);
 		return inode;
 	}
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
  */
 
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-		 const struct gfs2_inode *nip, unsigned type)
+		 const struct gfs2_inode *nip)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 				return PTR_ERR(dent);
 			dent = gfs2_init_dirent(inode, dent, name, bh);
 			gfs2_inum_out(nip, dent);
-			dent->de_type = cpu_to_be16(type);
+			dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
 			if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 				leaf = (struct gfs2_leaf *)bh->b_data;
 				be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
 			ip->i_entries++;
 			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
+			if (S_ISDIR(nip->i_inode.i_mode))
+				inc_nlink(&ip->i_inode);
 			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
 			error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
  * Returns: 0 on success, error code on failure
  */
 
-int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
+int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
 {
+	const struct qstr *name = &dentry->d_name;
 	struct gfs2_dirent *dent, *prev = NULL;
 	struct buffer_head *bh;
 	int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_entries--;
 	dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
+	if (S_ISDIR(dentry->d_inode->i_mode))
+		drop_nlink(&dip->i_inode);
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 }
 
 /**
- * foreach_leaf - call a function for each leaf in a directory
- * @dip: the directory
- * @lc: the function to call for each each
- * @data: private data to pass to it
- *
- * Returns: errno
- */
-
-static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct buffer_head *bh;
-	struct gfs2_leaf *leaf;
-	u32 hsize, len;
-	u32 ht_offset, lp_offset, ht_offset_cur = -1;
-	u32 index = 0;
-	__be64 *lp;
-	u64 leaf_no;
-	int error = 0;
-
-	hsize = 1 << dip->i_depth;
-	if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
-		gfs2_consist_inode(dip);
-		return -EIO;
-	}
-
-	lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
-	if (!lp)
-		return -ENOMEM;
-
-	while (index < hsize) {
-		lp_offset = index & (sdp->sd_hash_ptrs - 1);
-		ht_offset = index - lp_offset;
-
-		if (ht_offset_cur != ht_offset) {
-			error = gfs2_dir_read_data(dip, (char *)lp,
-						ht_offset * sizeof(__be64),
-						sdp->sd_hash_bsize, 1);
-			if (error != sdp->sd_hash_bsize) {
-				if (error >= 0)
-					error = -EIO;
-				goto out;
-			}
-			ht_offset_cur = ht_offset;
-		}
-
-		leaf_no = be64_to_cpu(lp[lp_offset]);
-		if (leaf_no) {
-			error = get_leaf(dip, leaf_no, &bh);
-			if (error)
-				goto out;
-			leaf = (struct gfs2_leaf *)bh->b_data;
-			len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
-			brelse(bh);
-
-			error = lc(dip, index, len, leaf_no, data);
-			if (error)
-				goto out;
-
-			index = (index & ~(len - 1)) + len;
-		} else
-			index++;
-	}
-
-	if (index != hsize) {
-		gfs2_consist_inode(dip);
-		error = -EIO;
-	}
-
-out:
-	kfree(lp);
-
-	return error;
-}
-
-/**
  * leaf_dealloc - Deallocate a directory leaf
  * @dip: the directory
  * @index: the hash table offset in the directory
  * @len: the number of pointers to this leaf
  * @leaf_no: the leaf number
- * @data: not used
+ * @leaf_bh: buffer_head for the starting leaf
+ * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
  *
  * Returns: errno
  */
 
 static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
-			u64 leaf_no, void *data)
+			u64 leaf_no, struct buffer_head *leaf_bh,
+			int last_dealloc)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 		goto out_qs;
 
 	/*  Count the number of leaves  */
+	bh = leaf_bh;
 
 	for (blk = leaf_no; blk; blk = nblk) {
-		error = get_leaf(dip, blk, &bh);
-		if (error)
-			goto out_rlist;
+		if (blk != leaf_no) {
+			error = get_leaf(dip, blk, &bh);
+			if (error)
+				goto out_rlist;
+		}
 		tmp_leaf = (struct gfs2_leaf *)bh->b_data;
 		nblk = be64_to_cpu(tmp_leaf->lf_next);
-		brelse(bh);
+		if (blk != leaf_no)
+			brelse(bh);
 
 		gfs2_rlist_add(sdp, &rlist, blk);
 		l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 	if (error)
 		goto out_rg_gunlock;
 
+	bh = leaf_bh;
+
 	for (blk = leaf_no; blk; blk = nblk) {
-		error = get_leaf(dip, blk, &bh);
-		if (error)
-			goto out_end_trans;
+		if (blk != leaf_no) {
+			error = get_leaf(dip, blk, &bh);
+			if (error)
+				goto out_end_trans;
+		}
 		tmp_leaf = (struct gfs2_leaf *)bh->b_data;
 		nblk = be64_to_cpu(tmp_leaf->lf_next);
-		brelse(bh);
+		if (blk != leaf_no)
+			brelse(bh);
 
 		gfs2_free_meta(dip, blk, 1);
 		gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 		goto out_end_trans;
 
 	gfs2_trans_add_bh(dip->i_gl, dibh, 1);
+	/* On the last dealloc, make this a regular file in case we crash.
+	   (We don't want to free these blocks a second time.)  */
+	if (last_dealloc)
+		dip->i_inode.i_mode = S_IFREG;
 	gfs2_dinode_out(dip, dibh->b_data);
 	brelse(dibh);
 
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct buffer_head *bh;
-	int error;
+	struct gfs2_leaf *leaf;
+	u32 hsize, len;
+	u32 ht_offset, lp_offset, ht_offset_cur = -1;
+	u32 index = 0, next_index;
+	__be64 *lp;
+	u64 leaf_no;
+	int error = 0, last;
 
-	/* Dealloc on-disk leaves to FREEMETA state */
-	error = foreach_leaf(dip, leaf_dealloc, NULL);
-	if (error)
-		return error;
+	hsize = 1 << dip->i_depth;
+	if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
+		gfs2_consist_inode(dip);
+		return -EIO;
+	}
 
-	/* Make this a regular file in case we crash.
-	   (We don't want to free these blocks a second time.)  */
+	lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
+	if (!lp)
+		return -ENOMEM;
 
-	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
-	if (error)
-		return error;
+	while (index < hsize) {
+		lp_offset = index & (sdp->sd_hash_ptrs - 1);
+		ht_offset = index - lp_offset;
 
-	error = gfs2_meta_inode_buffer(dip, &bh);
-	if (!error) {
-		gfs2_trans_add_bh(dip->i_gl, bh, 1);
-		((struct gfs2_dinode *)bh->b_data)->di_mode =
-						cpu_to_be32(S_IFREG);
-		brelse(bh);
+		if (ht_offset_cur != ht_offset) {
+			error = gfs2_dir_read_data(dip, (char *)lp,
+						ht_offset * sizeof(__be64),
+						sdp->sd_hash_bsize, 1);
+			if (error != sdp->sd_hash_bsize) {
+				if (error >= 0)
+					error = -EIO;
+				goto out;
+			}
+			ht_offset_cur = ht_offset;
+		}
+
+		leaf_no = be64_to_cpu(lp[lp_offset]);
+		if (leaf_no) {
+			error = get_leaf(dip, leaf_no, &bh);
+			if (error)
+				goto out;
+			leaf = (struct gfs2_leaf *)bh->b_data;
+			len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
+
+			next_index = (index & ~(len - 1)) + len;
+			last = ((next_index >= hsize) ? 1 : 0);
+			error = leaf_dealloc(dip, index, len, leaf_no, bh,
+					     last);
+			brelse(bh);
+			if (error)
+				goto out;
+			index = next_index;
+		} else
+			index++;
 	}
 
-	gfs2_trans_end(sdp);
+	if (index != hsize) {
+		gfs2_consist_inode(dip);
+		error = -EIO;
+	}
+
+out:
+	kfree(lp);
 
 	return error;
 }
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
 extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
 			  const struct gfs2_inode *ip);
 extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-			const struct gfs2_inode *ip, unsigned int type);
-extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
+			const struct gfs2_inode *ip);
+extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
 extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
 			 filldir_t filldir);
 extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct inode *inode;
 
-	inode = gfs2_ilookup(sb, inum->no_addr);
+	inode = gfs2_ilookup(sb, inum->no_addr, 0);
 	if (inode) {
 		if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
 			iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index b2682e073eee..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
 /**
  * gfs2_fsync - sync the dirty data for a file (across the cluster)
  * @file: the file that points to the dentry (we ignore this)
- * @dentry: the dentry that points to the inode to sync
+ * @datasync: set if we can ignore timestamp changes
  *
- * The VFS will flush "normal" data for us. We only need to worry
- * about metadata here. For journaled data, we just do a log flush
- * as we can't avoid it. Otherwise we can just bale out if datasync
- * is set. For stuffed inodes we must flush the log in order to
- * ensure that all data is on disk.
- *
- * The call to write_inode_now() is there to write back metadata and
- * the inode itself. It does also try and write the data, but thats
- * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
- * for us.
+ * The VFS will flush data for us. We only need to worry
+ * about metadata here.
  *
  * Returns: errno
  */
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
-	int ret = 0;
-
-	if (gfs2_is_jdata(GFS2_I(inode))) {
-		gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
-		return 0;
-	}
+	struct gfs2_inode *ip = GFS2_I(inode);
+	int ret;
 
-	if (sync_state != 0) {
-		if (!datasync)
-			ret = write_inode_now(inode, 0);
+	if (datasync)
+		sync_state &= ~I_DIRTY_SYNC;
 
-		if (gfs2_is_stuffed(GFS2_I(inode)))
-			gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+	if (sync_state) {
+		ret = sync_inode_metadata(inode, 1);
+		if (ret)
+			return ret;
+		gfs2_ail_flush(ip->i_gl);
 	}
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -617,18 +607,51 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	return generic_file_aio_write(iocb, iov, nr_segs, pos);
 }
 
-static void empty_write_end(struct page *page, unsigned from,
-			   unsigned to)
+static int empty_write_end(struct page *page, unsigned from,
+			   unsigned to, int mode)
 {
-	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct buffer_head *bh;
+	unsigned offset, blksize = 1 << inode->i_blkbits;
+	pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
 
 	zero_user(page, from, to-from);
 	mark_page_accessed(page);
 
-	if (!gfs2_is_writeback(ip))
-		gfs2_page_add_databufs(ip, page, from, to);
+	if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
+		if (!gfs2_is_writeback(ip))
+			gfs2_page_add_databufs(ip, page, from, to);
 
-	block_commit_write(page, from, to);
+		block_commit_write(page, from, to);
+		return 0;
+	}
+
+	offset = 0;
+	bh = page_buffers(page);
+	while (offset < to) {
+		if (offset >= from) {
+			set_buffer_uptodate(bh);
+			mark_buffer_dirty(bh);
+			clear_buffer_new(bh);
+			write_dirty_buffer(bh, WRITE);
+		}
+		offset += blksize;
+		bh = bh->b_this_page;
+	}
+
+	offset = 0;
+	bh = page_buffers(page);
+	while (offset < to) {
+		if (offset >= from) {
+			wait_on_buffer(bh);
+			if (!buffer_uptodate(bh))
+				return -EIO;
+		}
+		offset += blksize;
+		bh = bh->b_this_page;
+	}
+	return 0;
 }
 
 static int needs_empty_write(sector_t block, struct inode *inode)
@@ -643,7 +666,8 @@ static int needs_empty_write(sector_t block, struct inode *inode)
 	return !buffer_mapped(&bh_map);
 }
 
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
+			      int mode)
 {
 	struct inode *inode = page->mapping->host;
 	unsigned start, end, next, blksize;
@@ -668,7 +692,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 							  gfs2_block_map);
 				if (unlikely(ret))
 					return ret;
-				empty_write_end(page, start, end);
+				ret = empty_write_end(page, start, end, mode);
+				if (unlikely(ret))
+					return ret;
 				end = 0;
 			}
 			start = next;
@@ -682,7 +708,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 		ret = __block_write_begin(page, start, end - start, gfs2_block_map);
 		if (unlikely(ret))
 			return ret;
-		empty_write_end(page, start, end);
+		ret = empty_write_end(page, start, end, mode);
+		if (unlikely(ret))
+			return ret;
 	}
 
 	return 0;
@@ -731,7 +759,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 
 		if (curr == end)
 			to = end_offset;
-		error = write_empty_blocks(page, from, to);
+		error = write_empty_blocks(page, from, to, mode);
 		if (!error && offset + to > inode->i_size &&
 		    !(mode & FALLOC_FL_KEEP_SIZE)) {
 			i_size_write(inode, offset + to);
@@ -788,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	loff_t bytes, max_bytes;
 	struct gfs2_alloc *al;
 	int error;
+	loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
 	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
 	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
 
@@ -795,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (mode & ~FALLOC_FL_KEEP_SIZE)
 		return -EOPNOTSUPP;
 
-	offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
-		 sdp->sd_sb.sb_bsize_shift;
+	offset &= bsize_mask;
 
 	len = next - offset;
 	bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
 	if (!bytes)
 		bytes = UINT_MAX;
+	bytes &= bsize_mask;
+	if (bytes == 0)
+		bytes = sdp->sd_sb.sb_bsize;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
 	error = gfs2_glock_nq(&ip->i_gh);
@@ -832,6 +863,9 @@ retry:
 		if (error) {
 			if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
 				bytes >>= 1;
+				bytes &= bsize_mask;
+				if (bytes == 0)
+					bytes = sdp->sd_sb.sb_bsize;
 				goto retry;
 			}
 			goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f07643e21bfa..a2a6abbccc07 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -93,14 +93,12 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
 
 static inline void spin_lock_bucket(unsigned int hash)
 {
-	struct hlist_bl_head *bl = &gl_hash_table[hash];
-	bit_spin_lock(0, (unsigned long *)bl);
+	hlist_bl_lock(&gl_hash_table[hash]);
 }
 
 static inline void spin_unlock_bucket(unsigned int hash)
 {
-	struct hlist_bl_head *bl = &gl_hash_table[hash];
-	__bit_spin_unlock(0, (unsigned long *)bl);
+	hlist_bl_unlock(&gl_hash_table[hash]);
 }
 
 static void gfs2_glock_dealloc(struct rcu_head *rcu)
@@ -145,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
 {
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 
-	/* assert_spin_locked(&gl->gl_spin); */
-
 	if (gl->gl_state == LM_ST_UNLOCKED)
 		return 0;
-	if (test_bit(GLF_LFLUSH, &gl->gl_flags))
-		return 0;
-	if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
-	    !list_empty(&gl->gl_holders))
+	if (!list_empty(&gl->gl_holders))
 		return 0;
 	if (glops->go_demote_ok)
 		return glops->go_demote_ok(gl);
@@ -160,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
 }
 
 
+void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
+{
+	spin_lock(&lru_lock);
+
+	if (!list_empty(&gl->gl_lru))
+		list_del_init(&gl->gl_lru);
+	else
+		atomic_inc(&lru_count);
+
+	list_add_tail(&gl->gl_lru, &lru_list);
+	set_bit(GLF_LRU, &gl->gl_flags);
+	spin_unlock(&lru_lock);
+}
+
+static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
+{
+	spin_lock(&lru_lock);
+	if (!list_empty(&gl->gl_lru)) {
+		list_del_init(&gl->gl_lru);
+		atomic_dec(&lru_count);
+		clear_bit(GLF_LRU, &gl->gl_flags);
+	}
+	spin_unlock(&lru_lock);
+}
+
 /**
  * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
  * @gl: the glock
@@ -170,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
 
 static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
 {
-	if (demote_ok(gl)) {
-		spin_lock(&lru_lock);
-
-		if (!list_empty(&gl->gl_lru))
-			list_del_init(&gl->gl_lru);
-		else
-			atomic_inc(&lru_count);
-
-		list_add_tail(&gl->gl_lru, &lru_list);
-		spin_unlock(&lru_lock);
-	}
-}
-
-void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
-	spin_lock(&gl->gl_spin);
-	__gfs2_glock_schedule_for_reclaim(gl);
-	spin_unlock(&gl->gl_spin);
+	if (demote_ok(gl))
+		gfs2_glock_add_to_lru(gl);
 }
 
 /**
@@ -219,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
 		spin_lock_bucket(gl->gl_hash);
 		hlist_bl_del_rcu(&gl->gl_list);
 		spin_unlock_bucket(gl->gl_hash);
-		spin_lock(&lru_lock);
-		if (!list_empty(&gl->gl_lru)) {
-			list_del_init(&gl->gl_lru);
-			atomic_dec(&lru_count);
-		}
-		spin_unlock(&lru_lock);
+		gfs2_glock_remove_from_lru(gl);
 		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
 		GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
 		trace_gfs2_glock_put(gl);
@@ -544,11 +541,6 @@ __acquires(&gl->gl_spin)
 	clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
 
 	gfs2_glock_hold(gl);
-	if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
-	    gl->gl_state == LM_ST_DEFERRED) &&
-	    !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
-		lck_flags |= LM_FLAG_TRY_1CB;
-
 	if (sdp->sd_lockstruct.ls_ops->lm_lock)	{
 		/* lock_dlm */
 		ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -650,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
 	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
 
 	if (ip)
-		inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
+		inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
 	else
 		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
 	if (inode && !IS_ERR(inode)) {
@@ -1027,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 		return -EIO;
 
+	if (test_bit(GLF_LRU, &gl->gl_flags))
+		gfs2_glock_remove_from_lru(gl);
+
 	spin_lock(&gl->gl_spin);
 	add_to_queue(gh);
 	if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1084,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
 			fast_path = 1;
 	}
-	__gfs2_glock_schedule_for_reclaim(gl);
+	if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
+		__gfs2_glock_schedule_for_reclaim(gl);
 	trace_gfs2_glock_queue(gh, 0);
 	spin_unlock(&gl->gl_spin);
 	if (likely(fast_path))
@@ -1367,6 +1363,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
 	while(nr && !list_empty(&lru_list)) {
 		gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
 		list_del_init(&gl->gl_lru);
+		clear_bit(GLF_LRU, &gl->gl_flags);
 		atomic_dec(&lru_count);
 
 		/* Test for being demotable */
@@ -1389,6 +1386,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
 		}
 		nr_skipped++;
 		list_add(&gl->gl_lru, &skipped);
+		set_bit(GLF_LRU, &gl->gl_flags);
 	}
 	list_splice(&skipped, &lru_list);
 	atomic_add(nr_skipped, &lru_count);
@@ -1461,12 +1459,7 @@ static void thaw_glock(struct gfs2_glock *gl)
 
 static void clear_glock(struct gfs2_glock *gl)
 {
-	spin_lock(&lru_lock);
-	if (!list_empty(&gl->gl_lru)) {
-		list_del_init(&gl->gl_lru);
-		atomic_dec(&lru_count);
-	}
-	spin_unlock(&lru_lock);
+	gfs2_glock_remove_from_lru(gl);
 
 	spin_lock(&gl->gl_spin);
 	if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1601,9 +1594,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
 	return 0;
 }
 
-static const char *gflags2str(char *buf, const unsigned long *gflags)
+static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
 {
+	const unsigned long *gflags = &gl->gl_flags;
 	char *p = buf;
+
 	if (test_bit(GLF_LOCK, gflags))
 		*p++ = 'l';
 	if (test_bit(GLF_DEMOTE, gflags))
@@ -1626,6 +1621,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
 		*p++ = 'F';
 	if (test_bit(GLF_QUEUED, gflags))
 		*p++ = 'q';
+	if (test_bit(GLF_LRU, gflags))
+		*p++ = 'L';
+	if (gl->gl_object)
+		*p++ = 'o';
 	*p = 0;
 	return buf;
 }
@@ -1660,14 +1659,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
 	dtime *= 1000000/HZ; /* demote time in uSec */
 	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
 		dtime = 0;
-	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
+	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
 		  state2str(gl->gl_state),
 		  gl->gl_name.ln_type,
 		  (unsigned long long)gl->gl_name.ln_number,
-		  gflags2str(gflags_buf, &gl->gl_flags),
+		  gflags2str(gflags_buf, gl),
 		  state2str(gl->gl_target),
 		  state2str(gl->gl_demote_state), dtime,
 		  atomic_read(&gl->gl_ail_count),
+		  atomic_read(&gl->gl_revokes),
 		  atomic_read(&gl->gl_ref));
 
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
 
 extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
 extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
 extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
 extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
+extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
 extern void gfs2_glock_free(struct gfs2_glock *gl);
 
 extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3754e3cbf02b..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
 #include "trans.h"
 
 /**
- * ail_empty_gl - remove all buffers for a given lock from the AIL
+ * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
  * @gl: the glock
  *
  * None of the buffers should be dirty, locked, or pinned.
  */
 
-static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+static void __gfs2_ail_flush(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct list_head *head = &gl->gl_ail_list;
 	struct gfs2_bufdata *bd;
 	struct buffer_head *bh;
-	struct gfs2_trans tr;
-
-	memset(&tr, 0, sizeof(tr));
-	tr.tr_revokes = atomic_read(&gl->gl_ail_count);
-
-	if (!tr.tr_revokes)
-		return;
-
-	/* A shortened, inline version of gfs2_trans_begin() */
-	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
-	tr.tr_ip = (unsigned long)__builtin_return_address(0);
-	INIT_LIST_HEAD(&tr.tr_list_buf);
-	gfs2_log_reserve(sdp, tr.tr_reserved);
-	BUG_ON(current->journal_info);
-	current->journal_info = &tr;
 
 	spin_lock(&sdp->sd_ail_lock);
 	while (!list_empty(head)) {
@@ -76,11 +61,51 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 	}
 	gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
 	spin_unlock(&sdp->sd_ail_lock);
+}
+
+
+static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_trans tr;
+
+	memset(&tr, 0, sizeof(tr));
+	tr.tr_revokes = atomic_read(&gl->gl_ail_count);
+
+	if (!tr.tr_revokes)
+		return;
+
+	/* A shortened, inline version of gfs2_trans_begin() */
+	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
+	tr.tr_ip = (unsigned long)__builtin_return_address(0);
+	INIT_LIST_HEAD(&tr.tr_list_buf);
+	gfs2_log_reserve(sdp, tr.tr_reserved);
+	BUG_ON(current->journal_info);
+	current->journal_info = &tr;
+
+	__gfs2_ail_flush(gl);
 
 	gfs2_trans_end(sdp);
 	gfs2_log_flush(sdp, NULL);
 }
 
+void gfs2_ail_flush(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	unsigned int revokes = atomic_read(&gl->gl_ail_count);
+	int ret;
+
+	if (!revokes)
+		return;
+
+	ret = gfs2_trans_begin(sdp, 0, revokes);
+	if (ret)
+		return;
+	__gfs2_ail_flush(gl);
+	gfs2_trans_end(sdp);
+	gfs2_log_flush(sdp, NULL);
+}
+
 /**
  * rgrp_go_sync - sync out the metadata for this glock
  * @gl: the glock
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
 }
 
 /**
+ * gfs2_set_nlink - Set the inode's link count based on on-disk info
+ * @inode: The inode in question
+ * @nlink: The link count
+ *
+ * If the link count has hit zero, it must never be raised, whatever the
+ * on-disk inode might say. When new struct inodes are created the link
+ * count is set to 1, so that we can safely use this test even when reading
+ * in on disk information for the first time.
+ */
+
+static void gfs2_set_nlink(struct inode *inode, u32 nlink)
+{
+	/*
+	 * We will need to review setting the nlink count here in the
+	 * light of the forthcoming ro bind mount work. This is a reminder
+	 * to do that.
+	 */
+	if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
+		if (nlink == 0)
+			clear_nlink(inode);
+		else
+			inode->i_nlink = nlink;
+	}
+}
+
+static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+{
+	const struct gfs2_dinode *str = buf;
+	struct timespec atime;
+	u16 height, depth;
+
+	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
+		goto corrupt;
+	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
+	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
+	ip->i_inode.i_rdev = 0;
+	switch (ip->i_inode.i_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
+					   be32_to_cpu(str->di_minor));
+		break;
+	};
+
+	ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
+	ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
+	gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
+	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
+	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
+	atime.tv_sec = be64_to_cpu(str->di_atime);
+	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+	if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
+		ip->i_inode.i_atime = atime;
+	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
+	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+
+	ip->i_goal = be64_to_cpu(str->di_goal_meta);
+	ip->i_generation = be64_to_cpu(str->di_generation);
+
+	ip->i_diskflags = be32_to_cpu(str->di_flags);
+	gfs2_set_inode_flags(&ip->i_inode);
+	height = be16_to_cpu(str->di_height);
+	if (unlikely(height > GFS2_MAX_META_HEIGHT))
+		goto corrupt;
+	ip->i_height = (u8)height;
+
+	depth = be16_to_cpu(str->di_depth);
+	if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
+		goto corrupt;
+	ip->i_depth = (u8)depth;
+	ip->i_entries = be32_to_cpu(str->di_entries);
+
+	ip->i_eattr = be64_to_cpu(str->di_eattr);
+	if (S_ISREG(ip->i_inode.i_mode))
+		gfs2_set_aops(&ip->i_inode);
+
+	return 0;
+corrupt:
+	gfs2_consist_inode(ip);
+	return -EIO;
+}
+
+/**
+ * gfs2_inode_refresh - Refresh the incore copy of the dinode
+ * @ip: The GFS2 inode
+ *
+ * Returns: errno
+ */
+
+int gfs2_inode_refresh(struct gfs2_inode *ip)
+{
+	struct buffer_head *dibh;
+	int error;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+
+	if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
+		brelse(dibh);
+		return -EIO;
+	}
+
+	error = gfs2_dinode_in(ip, dibh->b_data);
+	brelse(dibh);
+	clear_bit(GIF_INVALID, &ip->i_flags);
+
+	return error;
+}
+
+/**
  * inode_go_lock - operation done after an inode lock is locked by a process
  * @gl: the glock
  * @flags:
@@ -385,6 +523,10 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
 static void iopen_go_callback(struct gfs2_glock *gl)
 {
 	struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+
+	if (sdp->sd_vfs->s_flags & MS_RDONLY)
+		return;
 
 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
 	    gl->gl_state == LM_ST_SHARED && ip) {
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
 extern const struct gfs2_glock_operations gfs2_journal_glops;
 extern const struct gfs2_glock_operations *gfs2_glops_list[];
 
+extern void gfs2_ail_flush(struct gfs2_glock *gl);
+
 #endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
-#define DIO_ALL		0x00000100
 
 struct gfs2_log_operations;
 struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
 	GLF_INITIAL			= 10,
 	GLF_FROZEN			= 11,
 	GLF_QUEUED			= 12,
+	GLF_LRU				= 13,
+	GLF_OBJECT			= 14, /* Used only for tracing */
 };
 
 struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
 
 	struct list_head gl_ail_list;
 	atomic_t gl_ail_count;
+	atomic_t gl_revokes;
 	struct delayed_work gl_work;
 	struct work_struct gl_delete;
 	struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
 	unsigned int ai_first;
 	struct list_head ai_ail1_list;
 	struct list_head ai_ail2_list;
-
-	u64 ai_sync_gen;
 };
 
 struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
 
 	char sb_lockproto[GFS2_LOCKNAME_LEN];
 	char sb_locktable[GFS2_LOCKNAME_LEN];
-	u8 sb_uuid[16];
 };
 
 /*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
 	spinlock_t sd_ail_lock;
 	struct list_head sd_ail1_list;
 	struct list_head sd_ail2_list;
-	u64 sd_ail_sync_gen;
 
 	/* Replay stuff */
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 97d54a28776a..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
  * of the GNU General Public License version 2.
  */
 
-#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/xattr.h>
 #include <linux/posix_acl.h>
-#include <linux/sort.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
+#include <linux/fiemap.h>
 #include <linux/security.h>
-#include <linux/time.h>
+#include <asm/uaccess.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -26,51 +28,70 @@
 #include "dir.h"
 #include "xattr.h"
 #include "glock.h"
-#include "glops.h"
 #include "inode.h"
-#include "log.h"
 #include "meta_io.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "super.h"
+#include "glops.h"
 
-struct gfs2_inum_range_host {
-	u64 ir_start;
-	u64 ir_length;
+struct gfs2_skip_data {
+	u64 no_addr;
+	int skipped;
+	int non_block;
 };
 
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	u64 *no_addr = opaque;
+	struct gfs2_skip_data *data = opaque;
 
-	if (ip->i_no_addr == *no_addr)
+	if (ip->i_no_addr == data->no_addr) {
+		if (data->non_block &&
+		    inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+			data->skipped = 1;
+			return 0;
+		}
 		return 1;
-
+	}
 	return 0;
 }
 
 static int iget_set(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	u64 *no_addr = opaque;
+	struct gfs2_skip_data *data = opaque;
 
-	inode->i_ino = (unsigned long)*no_addr;
-	ip->i_no_addr = *no_addr;
+	if (data->skipped)
+		return -ENOENT;
+	inode->i_ino = (unsigned long)(data->no_addr);
+	ip->i_no_addr = data->no_addr;
 	return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
 {
 	unsigned long hash = (unsigned long)no_addr;
-	return ilookup5(sb, hash, iget_test, &no_addr);
+	struct gfs2_skip_data data;
+
+	data.no_addr = no_addr;
+	data.skipped = 0;
+	data.non_block = non_block;
+	return ilookup5(sb, hash, iget_test, &data);
 }
 
-static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
+			       int non_block)
 {
+	struct gfs2_skip_data data;
 	unsigned long hash = (unsigned long)no_addr;
-	return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
+
+	data.no_addr = no_addr;
+	data.skipped = 0;
+	data.non_block = non_block;
+	return iget5_locked(sb, hash, iget_test, iget_set, &data);
 }
 
 /**
@@ -111,19 +132,20 @@ static void gfs2_set_iop(struct inode *inode)
  * @sb: The super block
  * @no_addr: The inode number
  * @type: The type of the inode
+ * non_block: Can we block on inodes that are being freed?
  *
  * Returns: A VFS inode, or an error
  */
 
 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
-				u64 no_addr, u64 no_formal_ino)
+				u64 no_addr, u64 no_formal_ino, int non_block)
 {
 	struct inode *inode;
 	struct gfs2_inode *ip;
 	struct gfs2_glock *io_gl = NULL;
 	int error;
 
-	inode = gfs2_iget(sb, no_addr);
+	inode = gfs2_iget(sb, no_addr, non_block);
 	ip = GFS2_I(inode);
 
 	if (!inode)
@@ -185,11 +207,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct gfs2_holder i_gh;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	int error;
 
+	/* Must not read in block until block type is verified */
 	error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
-				  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+				  LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
 	if (error)
 		return ERR_PTR(error);
 
@@ -197,7 +220,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 	if (error)
 		goto fail;
 
-	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
 	if (IS_ERR(inode))
 		goto fail;
 
@@ -222,203 +245,6 @@ fail_iput:
 	goto fail;
 }
 
-static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
-{
-	const struct gfs2_dinode *str = buf;
-	struct timespec atime;
-	u16 height, depth;
-
-	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
-		goto corrupt;
-	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
-	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
-	ip->i_inode.i_rdev = 0;
-	switch (ip->i_inode.i_mode & S_IFMT) {
-	case S_IFBLK:
-	case S_IFCHR:
-		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
-					   be32_to_cpu(str->di_minor));
-		break;
-	};
-
-	ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
-	ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
-	/*
-	 * We will need to review setting the nlink count here in the
-	 * light of the forthcoming ro bind mount work. This is a reminder
-	 * to do that.
-	 */
-	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
-	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
-	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
-	atime.tv_sec = be64_to_cpu(str->di_atime);
-	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
-	if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
-		ip->i_inode.i_atime = atime;
-	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
-	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
-
-	ip->i_goal = be64_to_cpu(str->di_goal_meta);
-	ip->i_generation = be64_to_cpu(str->di_generation);
-
-	ip->i_diskflags = be32_to_cpu(str->di_flags);
-	gfs2_set_inode_flags(&ip->i_inode);
-	height = be16_to_cpu(str->di_height);
-	if (unlikely(height > GFS2_MAX_META_HEIGHT))
-		goto corrupt;
-	ip->i_height = (u8)height;
-
-	depth = be16_to_cpu(str->di_depth);
-	if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
-		goto corrupt;
-	ip->i_depth = (u8)depth;
-	ip->i_entries = be32_to_cpu(str->di_entries);
-
-	ip->i_eattr = be64_to_cpu(str->di_eattr);
-	if (S_ISREG(ip->i_inode.i_mode))
-		gfs2_set_aops(&ip->i_inode);
-
-	return 0;
-corrupt:
-	if (gfs2_consist_inode(ip))
-		gfs2_dinode_print(ip);
-	return -EIO;
-}
-
-/**
- * gfs2_inode_refresh - Refresh the incore copy of the dinode
- * @ip: The GFS2 inode
- *
- * Returns: errno
- */
-
-int gfs2_inode_refresh(struct gfs2_inode *ip)
-{
-	struct buffer_head *dibh;
-	int error;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		return error;
-
-	if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
-		brelse(dibh);
-		return -EIO;
-	}
-
-	error = gfs2_dinode_in(ip, dibh->b_data);
-	brelse(dibh);
-	clear_bit(GIF_INVALID, &ip->i_flags);
-
-	return error;
-}
-
-int gfs2_dinode_dealloc(struct gfs2_inode *ip)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al;
-	struct gfs2_rgrpd *rgd;
-	int error;
-
-	if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		return -EIO;
-	}
-
-	al = gfs2_alloc_get(ip);
-	if (!al)
-		return -ENOMEM;
-
-	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-	if (error)
-		goto out;
-
-	error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
-	if (error)
-		goto out_qs;
-
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
-	if (!rgd) {
-		gfs2_consist_inode(ip);
-		error = -EIO;
-		goto out_rindex_relse;
-	}
-
-	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
-				   &al->al_rgd_gh);
-	if (error)
-		goto out_rindex_relse;
-
-	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
-	if (error)
-		goto out_rg_gunlock;
-
-	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
-	set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
-
-	gfs2_free_di(rgd, ip);
-
-	gfs2_trans_end(sdp);
-
-out_rg_gunlock:
-	gfs2_glock_dq_uninit(&al->al_rgd_gh);
-out_rindex_relse:
-	gfs2_glock_dq_uninit(&al->al_ri_gh);
-out_qs:
-	gfs2_quota_unhold(ip);
-out:
-	gfs2_alloc_put(ip);
-	return error;
-}
-
-/**
- * gfs2_change_nlink - Change nlink count on inode
- * @ip: The GFS2 inode
- * @diff: The change in the nlink count required
- *
- * Returns: errno
- */
-int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
-{
-	struct buffer_head *dibh;
-	u32 nlink;
-	int error;
-
-	BUG_ON(diff != 1 && diff != -1);
-	nlink = ip->i_inode.i_nlink + diff;
-
-	/* If we are reducing the nlink count, but the new value ends up being
-	   bigger than the old one, we must have underflowed. */
-	if (diff < 0 && nlink > ip->i_inode.i_nlink) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		return -EIO;
-	}
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		return error;
-
-	if (diff > 0)
-		inc_nlink(&ip->i_inode);
-	else
-		drop_nlink(&ip->i_inode);
-
-	ip->i_inode.i_ctime = CURRENT_TIME;
-
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
-	mark_inode_dirty(&ip->i_inode);
-
-	if (ip->i_inode.i_nlink == 0)
-		gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
-
-	return error;
-}
 
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
 {
@@ -517,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 
 	/*  Don't create entries in an unlinked directory  */
 	if (!dip->i_inode.i_nlink)
-		return -EPERM;
+		return -ENOENT;
 
 	error = gfs2_dir_check(&dip->i_inode, name, NULL);
 	switch (error) {
@@ -587,21 +413,44 @@ out:
 	return error;
 }
 
+static void gfs2_init_dir(struct buffer_head *dibh,
+			  const struct gfs2_inode *parent)
+{
+	struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+	struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
+
+	gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
+	dent->de_inum = di->di_num; /* already GFS2 endian */
+	dent->de_type = cpu_to_be16(DT_DIR);
+
+	dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
+	gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
+	gfs2_inum_out(parent, dent);
+	dent->de_type = cpu_to_be16(DT_DIR);
+	
+}
+
 /**
  * init_dinode - Fill in a new dinode structure
- * @dip: the directory this inode is being created in
+ * @dip: The directory this inode is being created in
  * @gl: The glock covering the new inode
- * @inum: the inode number
- * @mode: the file permissions
- * @uid:
- * @gid:
+ * @inum: The inode number
+ * @mode: The file permissions
+ * @uid: The uid of the new inode
+ * @gid: The gid of the new inode
+ * @generation: The generation number of the new inode
+ * @dev: The device number (if a device node)
+ * @symname: The symlink destination (if a symlink)
+ * @size: The inode size (ignored for directories)
+ * @bhp: The buffer head (returned to caller)
  *
  */
 
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 			const struct gfs2_inum_host *inum, unsigned int mode,
 			unsigned int uid, unsigned int gid,
-			const u64 *generation, dev_t dev, struct buffer_head **bhp)
+			const u64 *generation, dev_t dev, const char *symname,
+			unsigned size, struct buffer_head **bhp)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_dinode *di;
@@ -620,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_uid = cpu_to_be32(uid);
 	di->di_gid = cpu_to_be32(gid);
 	di->di_nlink = 0;
-	di->di_size = 0;
+	di->di_size = cpu_to_be64(size);
 	di->di_blocks = cpu_to_be64(1);
 	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
 	di->di_major = cpu_to_be32(MAJOR(dev));
@@ -628,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
 	di->di_generation = cpu_to_be64(*generation);
 	di->di_flags = 0;
-
-	if (S_ISREG(mode)) {
-		if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
-		    gfs2_tune_get(sdp, gt_new_files_jdata))
-			di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
-	} else if (S_ISDIR(mode)) {
-		di->di_flags |= cpu_to_be32(dip->i_diskflags &
-					    GFS2_DIF_INHERIT_JDATA);
-	}
-
 	di->__pad1 = 0;
 	di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
 	di->di_height = 0;
@@ -651,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
 	di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
 	memset(&di->di_reserved, 0, sizeof(di->di_reserved));
-	
+
+	switch(mode & S_IFMT) {	
+	case S_IFREG:
+		if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
+		    gfs2_tune_get(sdp, gt_new_files_jdata))
+			di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
+		break;
+	case S_IFDIR:
+		di->di_flags |= cpu_to_be32(dip->i_diskflags &
+					    GFS2_DIF_INHERIT_JDATA);
+		di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
+		di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
+		di->di_entries = cpu_to_be32(2);
+		gfs2_init_dir(dibh, dip);
+		break;
+	case S_IFLNK:
+		memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
+		break;
+	}
+
 	set_buffer_uptodate(dibh);
 
 	*bhp = dibh;
@@ -659,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 		       unsigned int mode, const struct gfs2_inum_host *inum,
-		       const u64 *generation, dev_t dev, struct buffer_head **bhp)
+		       const u64 *generation, dev_t dev, const char *symname,
+		       unsigned int size, struct buffer_head **bhp)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	unsigned int uid, gid;
@@ -681,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	if (error)
 		goto out_quota;
 
-	init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp);
+	init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
 	gfs2_quota_change(dip, +1, uid, gid);
 	gfs2_trans_end(sdp);
 
@@ -735,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;
 	}
 
-	error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
+	error = gfs2_dir_add(&dip->i_inode, name, ip);
 	if (error)
 		goto fail_end_trans;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
 		goto fail_end_trans;
-	ip->i_inode.i_nlink = 1;
+	inc_nlink(&ip->i_inode);
+	if (S_ISDIR(ip->i_inode.i_mode))
+		inc_nlink(&ip->i_inode);
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
@@ -789,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
 }
 
 /**
- * gfs2_createi - Create a new inode
- * @ghs: An array of two holders
- * @name: The name of the new file
- * @mode: the permissions on the new inode
+ * gfs2_create_inode - Create a new inode
+ * @dir: The parent directory
+ * @dentry: The new dentry
+ * @mode: The permissions on the new inode
+ * @dev: For device nodes, this is the device number
+ * @symname: For symlinks, this is the link destination
+ * @size: The initial size of the inode (ignored for directories)
  *
- * @ghs[0] is an initialized holder for the directory
- * @ghs[1] is the holder for the inode lock
- *
- * If the return value is not NULL, the glocks on both the directory and the new
- * file are held.  A transaction has been started and an inplace reservation
- * is held, as well.
- *
- * Returns: An inode
+ * Returns: 0 on success, or error code
  */
 
-struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
-			   unsigned int mode, dev_t dev)
+static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
+			     unsigned int mode, dev_t dev, const char *symname,
+			     unsigned int size)
 {
+	const struct qstr *name = &dentry->d_name;
+	struct gfs2_holder ghs[2];
 	struct inode *inode = NULL;
-	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
-	struct inode *dir = &dip->i_inode;
+	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
 	int error;
@@ -817,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	struct buffer_head *bh = NULL;
 
 	if (!name->len || name->len > GFS2_FNAMESIZE)
-		return ERR_PTR(-ENAMETOOLONG);
+		return -ENAMETOOLONG;
 
-	gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
-	error = gfs2_glock_nq(ghs);
+	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	if (error)
 		goto fail;
 
@@ -838,12 +696,12 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	if (error)
 		goto fail_gunlock;
 
-	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh);
+	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
 	if (error)
 		goto fail_gunlock2;
 
 	inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
-				  inum.no_formal_ino);
+				  inum.no_formal_ino, 0);
 	if (IS_ERR(inode))
 		goto fail_gunlock2;
 
@@ -865,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 
 	if (bh)
 		brelse(bh);
-	return inode;
+
+	gfs2_trans_end(sdp);
+	if (dip->i_alloc->al_rgd)
+		gfs2_inplace_release(dip);
+	gfs2_quota_unlock(dip);
+	gfs2_alloc_put(dip);
+	gfs2_glock_dq_uninit_m(2, ghs);
+	mark_inode_dirty(inode);
+	d_instantiate(dentry, inode);
+	return 0;
 
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
 	if (inode && !IS_ERR(inode))
 		iput(inode);
 fail_gunlock:
-	gfs2_glock_dq(ghs);
+	gfs2_glock_dq_uninit(ghs);
 fail:
 	if (bh)
 		brelse(bh);
-	return ERR_PTR(error);
+	return error;
+}
+
+/**
+ * gfs2_create - Create a file
+ * @dir: The directory in which to create the file
+ * @dentry: The dentry of the new file
+ * @mode: The mode of the new file
+ *
+ * Returns: errno
+ */
+
+static int gfs2_create(struct inode *dir, struct dentry *dentry,
+		       int mode, struct nameidata *nd)
+{
+	struct inode *inode;
+	int ret;
+
+	for (;;) {
+		ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
+		if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
+			return ret;
+
+		inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+		if (inode) {
+			if (!IS_ERR(inode))
+				break;
+			return PTR_ERR(inode);
+		}
+	}
+
+	d_instantiate(dentry, inode);
+	return 0;
+}
+
+/**
+ * gfs2_lookup - Look up a filename in a directory and return its inode
+ * @dir: The directory inode
+ * @dentry: The dentry of the new inode
+ * @nd: passed from Linux VFS, ignored by us
+ *
+ * Called by the VFS layer. Lock dir and call gfs2_lookupi()
+ *
+ * Returns: errno
+ */
+
+static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
+				  struct nameidata *nd)
+{
+	struct inode *inode = NULL;
+
+	inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+	if (inode && IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	if (inode) {
+		struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
+		struct gfs2_holder gh;
+		int error;
+		error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+		if (error) {
+			iput(inode);
+			return ERR_PTR(error);
+		}
+		gfs2_glock_dq_uninit(&gh);
+		return d_splice_alias(inode, dentry);
+	}
+	d_add(dentry, inode);
+
+	return NULL;
+}
+
+/**
+ * gfs2_link - Link to a file
+ * @old_dentry: The inode to link
+ * @dir: Add link to this directory
+ * @dentry: The name of the link
+ *
+ * Link the inode in "old_dentry" into the directory "dir" with the
+ * name in "dentry".
+ *
+ * Returns: errno
+ */
+
+static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
+		     struct dentry *dentry)
+{
+	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_sbd *sdp = GFS2_SB(dir);
+	struct inode *inode = old_dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder ghs[2];
+	struct buffer_head *dibh;
+	int alloc_required;
+	int error;
+
+	if (S_ISDIR(inode->i_mode))
+		return -EPERM;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+
+	error = gfs2_glock_nq(ghs); /* parent */
+	if (error)
+		goto out_parent;
+
+	error = gfs2_glock_nq(ghs + 1); /* child */
+	if (error)
+		goto out_child;
+
+	error = -ENOENT;
+	if (inode->i_nlink == 0)
+		goto out_gunlock;
+
+	error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
+	if (error)
+		goto out_gunlock;
+
+	error = gfs2_dir_check(dir, &dentry->d_name, NULL);
+	switch (error) {
+	case -ENOENT:
+		break;
+	case 0:
+		error = -EEXIST;
+	default:
+		goto out_gunlock;
+	}
+
+	error = -EINVAL;
+	if (!dip->i_inode.i_nlink)
+		goto out_gunlock;
+	error = -EFBIG;
+	if (dip->i_entries == (u32)-1)
+		goto out_gunlock;
+	error = -EPERM;
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		goto out_gunlock;
+	error = -EINVAL;
+	if (!ip->i_inode.i_nlink)
+		goto out_gunlock;
+	error = -EMLINK;
+	if (ip->i_inode.i_nlink == (u32)-1)
+		goto out_gunlock;
+
+	alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
+	if (error < 0)
+		goto out_gunlock;
+	error = 0;
+
+	if (alloc_required) {
+		struct gfs2_alloc *al = gfs2_alloc_get(dip);
+		if (!al) {
+			error = -ENOMEM;
+			goto out_gunlock;
+		}
+
+		error = gfs2_quota_lock_check(dip);
+		if (error)
+			goto out_alloc;
+
+		al->al_requested = sdp->sd_max_dirres;
+
+		error = gfs2_inplace_reserve(dip);
+		if (error)
+			goto out_gunlock_q;
+
+		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
+					 gfs2_rg_blocks(al) +
+					 2 * RES_DINODE + RES_STATFS +
+					 RES_QUOTA, 0);
+		if (error)
+			goto out_ipres;
+	} else {
+		error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
+		if (error)
+			goto out_ipres;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out_end_trans;
+
+	error = gfs2_dir_add(dir, &dentry->d_name, ip);
+	if (error)
+		goto out_brelse;
+
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+	inc_nlink(&ip->i_inode);
+	ip->i_inode.i_ctime = CURRENT_TIME;
+	gfs2_dinode_out(ip, dibh->b_data);
+	mark_inode_dirty(&ip->i_inode);
+
+out_brelse:
+	brelse(dibh);
+out_end_trans:
+	gfs2_trans_end(sdp);
+out_ipres:
+	if (alloc_required)
+		gfs2_inplace_release(dip);
+out_gunlock_q:
+	if (alloc_required)
+		gfs2_quota_unlock(dip);
+out_alloc:
+	if (alloc_required)
+		gfs2_alloc_put(dip);
+out_gunlock:
+	gfs2_glock_dq(ghs + 1);
+out_child:
+	gfs2_glock_dq(ghs);
+out_parent:
+	gfs2_holder_uninit(ghs);
+	gfs2_holder_uninit(ghs + 1);
+	if (!error) {
+		ihold(inode);
+		d_instantiate(dentry, inode);
+		mark_inode_dirty(inode);
+	}
+	return error;
+}
+
+/*
+ * gfs2_unlink_ok - check to see that a inode is still in a directory
+ * @dip: the directory
+ * @name: the name of the file
+ * @ip: the inode
+ *
+ * Assumes that the lock on (at least) @dip is held.
+ *
+ * Returns: 0 if the parent/child relationship is correct, errno if it isn't
+ */
+
+static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
+			  const struct gfs2_inode *ip)
+{
+	int error;
+
+	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
+		return -EPERM;
+
+	if ((dip->i_inode.i_mode & S_ISVTX) &&
+	    dip->i_inode.i_uid != current_fsuid() &&
+	    ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (IS_APPEND(&dip->i_inode))
+		return -EPERM;
+
+	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
+	if (error)
+		return error;
+
+	error = gfs2_dir_check(&dip->i_inode, name, ip);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+/**
+ * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
+ * @dip: The parent directory
+ * @name: The name of the entry in the parent directory
+ * @bh: The inode buffer for the inode to be removed
+ * @inode: The inode to be removed
+ *
+ * Called with all the locks and in a transaction. This will only be
+ * called for a directory after it has been checked to ensure it is empty.
+ *
+ * Returns: 0 on success, or an error
+ */
+
+static int gfs2_unlink_inode(struct gfs2_inode *dip,
+			     const struct dentry *dentry,
+			     struct buffer_head *bh)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	int error;
+
+	error = gfs2_dir_del(dip, dentry);
+	if (error)
+		return error;
+
+	ip->i_entries = 0;
+	inode->i_ctime = CURRENT_TIME;
+	if (S_ISDIR(inode->i_mode))
+		clear_nlink(inode);
+	else
+		drop_nlink(inode);
+	gfs2_trans_add_bh(ip->i_gl, bh, 1);
+	gfs2_dinode_out(ip, bh->b_data);
+	mark_inode_dirty(inode);
+	if (inode->i_nlink == 0)
+		gfs2_unlink_di(inode);
+	return 0;
+}
+
+
+/**
+ * gfs2_unlink - Unlink an inode (this does rmdir as well)
+ * @dir: The inode of the directory containing the inode to unlink
+ * @dentry: The file itself
+ *
+ * This routine uses the type of the inode as a flag to figure out
+ * whether this is an unlink or an rmdir.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_sbd *sdp = GFS2_SB(dir);
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct buffer_head *bh;
+	struct gfs2_holder ghs[3];
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_holder ri_gh;
+	int error;
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		return error;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
+
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+
+	error = gfs2_glock_nq(ghs); /* parent */
+	if (error)
+		goto out_parent;
+
+	error = gfs2_glock_nq(ghs + 1); /* child */
+	if (error)
+		goto out_child;
+
+	error = -ENOENT;
+	if (inode->i_nlink == 0)
+		goto out_rgrp;
+
+	if (S_ISDIR(inode->i_mode)) {
+		error = -ENOTEMPTY;
+		if (ip->i_entries > 2 || inode->i_nlink > 2)
+			goto out_rgrp;
+	}
+
+	error = gfs2_glock_nq(ghs + 2); /* rgrp */
+	if (error)
+		goto out_rgrp;
+
+	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
+	if (error)
+		goto out_gunlock;
+
+	error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
+	if (error)
+		goto out_gunlock;
+
+	error = gfs2_meta_inode_buffer(ip, &bh);
+	if (error)
+		goto out_end_trans;
+
+	error = gfs2_unlink_inode(dip, dentry, bh);
+	brelse(bh);
+
+out_end_trans:
+	gfs2_trans_end(sdp);
+out_gunlock:
+	gfs2_glock_dq(ghs + 2);
+out_rgrp:
+	gfs2_holder_uninit(ghs + 2);
+	gfs2_glock_dq(ghs + 1);
+out_child:
+	gfs2_holder_uninit(ghs + 1);
+	gfs2_glock_dq(ghs);
+out_parent:
+	gfs2_holder_uninit(ghs);
+	gfs2_glock_dq_uninit(&ri_gh);
+	return error;
+}
+
+/**
+ * gfs2_symlink - Create a symlink
+ * @dir: The directory to create the symlink in
+ * @dentry: The dentry to put the symlink in
+ * @symname: The thing which the link points to
+ *
+ * Returns: errno
+ */
+
+static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
+			const char *symname)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(dir);
+	unsigned int size;
+
+	size = strlen(symname);
+	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
+		return -ENAMETOOLONG;
+
+	return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
+}
+
+/**
+ * gfs2_mkdir - Make a directory
+ * @dir: The parent directory of the new one
+ * @dentry: The dentry of the new directory
+ * @mode: The mode of the new directory
+ *
+ * Returns: errno
+ */
+
+static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
+}
+
+/**
+ * gfs2_mknod - Make a special file
+ * @dir: The directory in which the special file will reside
+ * @dentry: The dentry of the special file
+ * @mode: The mode of the special file
+ * @dev: The device specification of the special file
+ *
+ */
+
+static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
+		      dev_t dev)
+{
+	return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
+}
+
+/*
+ * gfs2_ok_to_move - check if it's ok to move a directory to another directory
+ * @this: move this
+ * @to: to here
+ *
+ * Follow @to back to the root and make sure we don't encounter @this
+ * Assumes we already hold the rename lock.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
+{
+	struct inode *dir = &to->i_inode;
+	struct super_block *sb = dir->i_sb;
+	struct inode *tmp;
+	int error = 0;
+
+	igrab(dir);
+
+	for (;;) {
+		if (dir == &this->i_inode) {
+			error = -EINVAL;
+			break;
+		}
+		if (dir == sb->s_root->d_inode) {
+			error = 0;
+			break;
+		}
+
+		tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
+		if (IS_ERR(tmp)) {
+			error = PTR_ERR(tmp);
+			break;
+		}
+
+		iput(dir);
+		dir = tmp;
+	}
+
+	iput(dir);
+
+	return error;
+}
+
+/**
+ * gfs2_rename - Rename a file
+ * @odir: Parent directory of old file name
+ * @odentry: The old dentry of the file
+ * @ndir: Parent directory of new file name
+ * @ndentry: The new dentry of the file
+ *
+ * Returns: errno
+ */
+
+static int gfs2_rename(struct inode *odir, struct dentry *odentry,
+		       struct inode *ndir, struct dentry *ndentry)
+{
+	struct gfs2_inode *odip = GFS2_I(odir);
+	struct gfs2_inode *ndip = GFS2_I(ndir);
+	struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
+	struct gfs2_inode *nip = NULL;
+	struct gfs2_sbd *sdp = GFS2_SB(odir);
+	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
+	struct gfs2_rgrpd *nrgd;
+	unsigned int num_gh;
+	int dir_rename = 0;
+	int alloc_required = 0;
+	unsigned int x;
+	int error;
+
+	if (ndentry->d_inode) {
+		nip = GFS2_I(ndentry->d_inode);
+		if (ip == nip)
+			return 0;
+	}
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		return error;
+
+	if (odip != ndip) {
+		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
+					   0, &r_gh);
+		if (error)
+			goto out;
+
+		if (S_ISDIR(ip->i_inode.i_mode)) {
+			dir_rename = 1;
+			/* don't move a dirctory into it's subdir */
+			error = gfs2_ok_to_move(ip, ndip);
+			if (error)
+				goto out_gunlock_r;
+		}
+	}
+
+	num_gh = 1;
+	gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	if (odip != ndip) {
+		gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+		num_gh++;
+	}
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+	num_gh++;
+
+	if (nip) {
+		gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+		num_gh++;
+		/* grab the resource lock for unlink flag twiddling 
+		 * this is the case of the target file already existing
+		 * so we unlink before doing the rename
+		 */
+		nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
+		if (nrgd)
+			gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
+	}
+
+	for (x = 0; x < num_gh; x++) {
+		error = gfs2_glock_nq(ghs + x);
+		if (error)
+			goto out_gunlock;
+	}
+
+	error = -ENOENT;
+	if (ip->i_inode.i_nlink == 0)
+		goto out_gunlock;
+
+	/* Check out the old directory */
+
+	error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
+	if (error)
+		goto out_gunlock;
+
+	/* Check out the new directory */
+
+	if (nip) {
+		error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
+		if (error)
+			goto out_gunlock;
+
+		if (nip->i_inode.i_nlink == 0) {
+			error = -EAGAIN;
+			goto out_gunlock;
+		}
+
+		if (S_ISDIR(nip->i_inode.i_mode)) {
+			if (nip->i_entries < 2) {
+				gfs2_consist_inode(nip);
+				error = -EIO;
+				goto out_gunlock;
+			}
+			if (nip->i_entries > 2) {
+				error = -ENOTEMPTY;
+				goto out_gunlock;
+			}
+		}
+	} else {
+		error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
+		if (error)
+			goto out_gunlock;
+
+		error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
+		switch (error) {
+		case -ENOENT:
+			error = 0;
+			break;
+		case 0:
+			error = -EEXIST;
+		default:
+			goto out_gunlock;
+		};
+
+		if (odip != ndip) {
+			if (!ndip->i_inode.i_nlink) {
+				error = -ENOENT;
+				goto out_gunlock;
+			}
+			if (ndip->i_entries == (u32)-1) {
+				error = -EFBIG;
+				goto out_gunlock;
+			}
+			if (S_ISDIR(ip->i_inode.i_mode) &&
+			    ndip->i_inode.i_nlink == (u32)-1) {
+				error = -EMLINK;
+				goto out_gunlock;
+			}
+		}
+	}
+
+	/* Check out the dir to be renamed */
+
+	if (dir_rename) {
+		error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
+		if (error)
+			goto out_gunlock;
+	}
+
+	if (nip == NULL)
+		alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
+	error = alloc_required;
+	if (error < 0)
+		goto out_gunlock;
+	error = 0;
+
+	if (alloc_required) {
+		struct gfs2_alloc *al = gfs2_alloc_get(ndip);
+		if (!al) {
+			error = -ENOMEM;
+			goto out_gunlock;
+		}
+
+		error = gfs2_quota_lock_check(ndip);
+		if (error)
+			goto out_alloc;
+
+		al->al_requested = sdp->sd_max_dirres;
+
+		error = gfs2_inplace_reserve_ri(ndip);
+		if (error)
+			goto out_gunlock_q;
+
+		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
+					 gfs2_rg_blocks(al) +
+					 4 * RES_DINODE + 4 * RES_LEAF +
+					 RES_STATFS + RES_QUOTA + 4, 0);
+		if (error)
+			goto out_ipreserv;
+	} else {
+		error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
+					 5 * RES_LEAF + 4, 0);
+		if (error)
+			goto out_gunlock;
+	}
+
+	/* Remove the target file, if it exists */
+
+	if (nip) {
+		struct buffer_head *bh;
+		error = gfs2_meta_inode_buffer(nip, &bh);
+		if (error)
+			goto out_end_trans;
+		error = gfs2_unlink_inode(ndip, ndentry, bh);
+		brelse(bh);
+	}
+
+	if (dir_rename) {
+		error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
+		if (error)
+			goto out_end_trans;
+	} else {
+		struct buffer_head *dibh;
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (error)
+			goto out_end_trans;
+		ip->i_inode.i_ctime = CURRENT_TIME;
+		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+		gfs2_dinode_out(ip, dibh->b_data);
+		brelse(dibh);
+	}
+
+	error = gfs2_dir_del(odip, odentry);
+	if (error)
+		goto out_end_trans;
+
+	error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
+	if (error)
+		goto out_end_trans;
+
+out_end_trans:
+	gfs2_trans_end(sdp);
+out_ipreserv:
+	if (alloc_required)
+		gfs2_inplace_release(ndip);
+out_gunlock_q:
+	if (alloc_required)
+		gfs2_quota_unlock(ndip);
+out_alloc:
+	if (alloc_required)
+		gfs2_alloc_put(ndip);
+out_gunlock:
+	while (x--) {
+		gfs2_glock_dq(ghs + x);
+		gfs2_holder_uninit(ghs + x);
+	}
+out_gunlock_r:
+	if (r_gh.gh_gl)
+		gfs2_glock_dq_uninit(&r_gh);
+out:
+	gfs2_glock_dq_uninit(&ri_gh);
+	return error;
+}
+
+/**
+ * gfs2_follow_link - Follow a symbolic link
+ * @dentry: The dentry of the link
+ * @nd: Data that we pass to vfs_follow_link()
+ *
+ * This can handle symlinks of any size.
+ *
+ * Returns: 0 on success or error code
+ */
+
+static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	struct gfs2_holder i_gh;
+	struct buffer_head *dibh;
+	unsigned int size;
+	char *buf;
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	error = gfs2_glock_nq(&i_gh);
+	if (error) {
+		gfs2_holder_uninit(&i_gh);
+		nd_set_link(nd, ERR_PTR(error));
+		return NULL;
+	}
+
+	size = (unsigned int)i_size_read(&ip->i_inode);
+	if (size == 0) {
+		gfs2_consist_inode(ip);
+		buf = ERR_PTR(-EIO);
+		goto out;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error) {
+		buf = ERR_PTR(error);
+		goto out;
+	}
+
+	buf = kzalloc(size + 1, GFP_NOFS);
+	if (!buf)
+		buf = ERR_PTR(-ENOMEM);
+	else
+		memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
+	brelse(dibh);
+out:
+	gfs2_glock_dq_uninit(&i_gh);
+	nd_set_link(nd, buf);
+	return NULL;
+}
+
+static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		kfree(s);
+}
+
+/**
+ * gfs2_permission -
+ * @inode: The inode
+ * @mask: The mask to be tested
+ * @flags: Indicates whether this is an RCU path walk or not
+ *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done.
+ *
+ * Returns: errno
+ */
+
+int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
+{
+	struct gfs2_inode *ip;
+	struct gfs2_holder i_gh;
+	int error;
+	int unlock = 0;
+
+
+	ip = GFS2_I(inode);
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+		if (flags & IPERM_FLAG_RCU)
+			return -ECHILD;
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+		if (error)
+			return error;
+		unlock = 1;
+	}
+
+	if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
+		error = -EACCES;
+	else
+		error = generic_permission(inode, mask, flags, gfs2_check_acl);
+	if (unlock)
+		gfs2_glock_dq_uninit(&i_gh);
+
+	return error;
 }
 
 static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -902,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
  * @ip:
  * @attr:
  *
- * Called with a reference on the vnode.
- *
  * Returns: errno
  */
 
@@ -923,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 	return error;
 }
 
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-{
-	struct gfs2_dinode *str = buf;
-
-	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
-	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
-	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
-	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
-	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
-	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
-
-	str->di_goal_meta = cpu_to_be64(ip->i_goal);
-	str->di_goal_data = cpu_to_be64(ip->i_goal);
-	str->di_generation = cpu_to_be64(ip->i_generation);
-
-	str->di_flags = cpu_to_be32(ip->i_diskflags);
-	str->di_height = cpu_to_be16(ip->i_height);
-	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
-					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
-					     GFS2_FORMAT_DE : 0);
-	str->di_depth = cpu_to_be16(ip->i_depth);
-	str->di_entries = cpu_to_be32(ip->i_entries);
-
-	str->di_eattr = cpu_to_be64(ip->i_eattr);
-	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
-	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
-	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
-}
-
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
-	printk(KERN_INFO "  no_formal_ino = %llu\n",
-	       (unsigned long long)ip->i_no_formal_ino);
-	printk(KERN_INFO "  no_addr = %llu\n",
-	       (unsigned long long)ip->i_no_addr);
-	printk(KERN_INFO "  i_size = %llu\n",
-	       (unsigned long long)i_size_read(&ip->i_inode));
-	printk(KERN_INFO "  blocks = %llu\n",
-	       (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
-	printk(KERN_INFO "  i_goal = %llu\n",
-	       (unsigned long long)ip->i_goal);
-	printk(KERN_INFO "  i_diskflags = 0x%.8X\n", ip->i_diskflags);
-	printk(KERN_INFO "  i_height = %u\n", ip->i_height);
-	printk(KERN_INFO "  i_depth = %u\n", ip->i_depth);
-	printk(KERN_INFO "  i_entries = %u\n", ip->i_entries);
-	printk(KERN_INFO "  i_eattr = %llu\n",
-	       (unsigned long long)ip->i_eattr);
+static int setattr_chown(struct inode *inode, struct iattr *attr)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	u32 ouid, ogid, nuid, ngid;
+	int error;
+
+	ouid = inode->i_uid;
+	ogid = inode->i_gid;
+	nuid = attr->ia_uid;
+	ngid = attr->ia_gid;
+
+	if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
+		ouid = nuid = NO_QUOTA_CHANGE;
+	if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
+		ogid = ngid = NO_QUOTA_CHANGE;
+
+	if (!gfs2_alloc_get(ip))
+		return -ENOMEM;
+
+	error = gfs2_quota_lock(ip, nuid, ngid);
+	if (error)
+		goto out_alloc;
+
+	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+		error = gfs2_quota_check(ip, nuid, ngid);
+		if (error)
+			goto out_gunlock_q;
+	}
+
+	error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
+	if (error)
+		goto out_gunlock_q;
+
+	error = gfs2_setattr_simple(ip, attr);
+	if (error)
+		goto out_end_trans;
+
+	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
+		u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
+		gfs2_quota_change(ip, -blocks, ouid, ogid);
+		gfs2_quota_change(ip, blocks, nuid, ngid);
+	}
+
+out_end_trans:
+	gfs2_trans_end(sdp);
+out_gunlock_q:
+	gfs2_quota_unlock(ip);
+out_alloc:
+	gfs2_alloc_put(ip);
+	return error;
 }
 
+/**
+ * gfs2_setattr - Change attributes on an inode
+ * @dentry: The dentry which is changing
+ * @attr: The structure describing the change
+ *
+ * The VFS layer wants to change one or more of an inodes attributes.  Write
+ * that change out to disk.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder i_gh;
+	int error;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+	if (error)
+		return error;
+
+	error = -EPERM;
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		goto out;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		goto out;
+
+	if (attr->ia_valid & ATTR_SIZE)
+		error = gfs2_setattr_size(inode, attr->ia_size);
+	else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
+		error = setattr_chown(inode, attr);
+	else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
+		error = gfs2_acl_chmod(ip, attr);
+	else
+		error = gfs2_setattr_simple(ip, attr);
+
+out:
+	gfs2_glock_dq_uninit(&i_gh);
+	if (!error)
+		mark_inode_dirty(inode);
+	return error;
+}
+
+/**
+ * gfs2_getattr - Read out an inode's attributes
+ * @mnt: The vfsmount the inode is being accessed from
+ * @dentry: The dentry to stat
+ * @stat: The inode's stats
+ *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done. Note that its the NFS
+ * readdirplus operation which causes this to be called (from filldir)
+ * with the glock already held.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int error;
+	int unlock = 0;
+
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+		if (error)
+			return error;
+		unlock = 1;
+	}
+
+	generic_fillattr(inode, stat);
+	if (unlock)
+		gfs2_glock_dq_uninit(&gh);
+
+	return 0;
+}
+
+static int gfs2_setxattr(struct dentry *dentry, const char *name,
+			 const void *data, size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret == 0) {
+		ret = generic_setxattr(dentry, name, data, size, flags);
+		gfs2_glock_dq(&gh);
+	}
+	gfs2_holder_uninit(&gh);
+	return ret;
+}
+
+static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
+			     void *data, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret == 0) {
+		ret = generic_getxattr(dentry, name, data, size);
+		gfs2_glock_dq(&gh);
+	}
+	gfs2_holder_uninit(&gh);
+	return ret;
+}
+
+static int gfs2_removexattr(struct dentry *dentry, const char *name)
+{
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret == 0) {
+		ret = generic_removexattr(dentry, name);
+		gfs2_glock_dq(&gh);
+	}
+	gfs2_holder_uninit(&gh);
+	return ret;
+}
+
+static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		       u64 start, u64 len)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
+
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	if (ret)
+		goto out;
+
+	if (gfs2_is_stuffed(ip)) {
+		u64 phys = ip->i_no_addr << inode->i_blkbits;
+		u64 size = i_size_read(inode);
+		u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
+			    FIEMAP_EXTENT_DATA_INLINE;
+		phys += sizeof(struct gfs2_dinode);
+		phys += start;
+		if (start + len > size)
+			len = size - start;
+		if (start < size)
+			ret = fiemap_fill_next_extent(fieinfo, start, phys,
+						      len, flags);
+		if (ret == 1)
+			ret = 0;
+	} else {
+		ret = __generic_block_fiemap(inode, fieinfo, start, len,
+					     gfs2_block_map);
+	}
+
+	gfs2_glock_dq_uninit(&gh);
+out:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
+const struct inode_operations gfs2_file_iops = {
+	.permission = gfs2_permission,
+	.setattr = gfs2_setattr,
+	.getattr = gfs2_getattr,
+	.setxattr = gfs2_setxattr,
+	.getxattr = gfs2_getxattr,
+	.listxattr = gfs2_listxattr,
+	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
+};
+
+const struct inode_operations gfs2_dir_iops = {
+	.create = gfs2_create,
+	.lookup = gfs2_lookup,
+	.link = gfs2_link,
+	.unlink = gfs2_unlink,
+	.symlink = gfs2_symlink,
+	.mkdir = gfs2_mkdir,
+	.rmdir = gfs2_unlink,
+	.mknod = gfs2_mknod,
+	.rename = gfs2_rename,
+	.permission = gfs2_permission,
+	.setattr = gfs2_setattr,
+	.getattr = gfs2_getattr,
+	.setxattr = gfs2_setxattr,
+	.getxattr = gfs2_getxattr,
+	.listxattr = gfs2_listxattr,
+	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
+};
+
+const struct inode_operations gfs2_symlink_iops = {
+	.readlink = generic_readlink,
+	.follow_link = gfs2_follow_link,
+	.put_link = gfs2_put_link,
+	.permission = gfs2_permission,
+	.setattr = gfs2_setattr,
+	.getattr = gfs2_getattr,
+	.setxattr = gfs2_setxattr,
+	.getxattr = gfs2_getxattr,
+	.listxattr = gfs2_listxattr,
+	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
+};
+
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 3e00a66e7cbd..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -97,26 +97,21 @@ err:
 }
 
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-				       u64 no_addr, u64 no_formal_ino);
+				       u64 no_addr, u64 no_formal_ino,
+				       int non_block);
 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
 					 u64 *no_formal_ino,
 					 unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
 
 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
 
-extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
-extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
 extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 				  int is_root);
-extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
-				  const struct qstr *name,
-				  unsigned int mode, dev_t dev);
 extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
 extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-extern void gfs2_dinode_print(const struct gfs2_inode *ip);
 
 extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..cec26c00b50d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/bio.h>
+#include <linux/writeback.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 /**
  * gfs2_ail1_start_one - Start I/O on a part of the AIL
  * @sdp: the filesystem
- * @tr: the part of the AIL
+ * @wbc: The writeback control structure
+ * @ai: The ail structure
  *
  */
 
-static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
+			       struct writeback_control *wbc,
+			       struct gfs2_ail *ai)
 __releases(&sdp->sd_ail_lock)
 __acquires(&sdp->sd_ail_lock)
 {
+	struct gfs2_glock *gl = NULL;
+	struct address_space *mapping;
 	struct gfs2_bufdata *bd, *s;
 	struct buffer_head *bh;
-	int retry;
 
-	do {
-		retry = 0;
+	list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
+		bh = bd->bd_bh;
 
-		list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
-						 bd_ail_st_list) {
-			bh = bd->bd_bh;
+		gfs2_assert(sdp, bd->bd_ail == ai);
 
-			gfs2_assert(sdp, bd->bd_ail == ai);
+		if (!buffer_busy(bh)) {
+			if (!buffer_uptodate(bh))
+				gfs2_io_error_bh(sdp, bh);
+			list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+			continue;
+		}
 
-			if (!buffer_busy(bh)) {
-				if (!buffer_uptodate(bh))
-					gfs2_io_error_bh(sdp, bh);
-				list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
-				continue;
-			}
+		if (!buffer_dirty(bh))
+			continue;
+		if (gl == bd->bd_gl)
+			continue;
+		gl = bd->bd_gl;
+		list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+		mapping = bh->b_page->mapping;
+		if (!mapping)
+			continue;
+		spin_unlock(&sdp->sd_ail_lock);
+		generic_writepages(mapping, wbc);
+		spin_lock(&sdp->sd_ail_lock);
+		if (wbc->nr_to_write <= 0)
+			break;
+		return 1;
+	}
 
-			if (!buffer_dirty(bh))
-				continue;
+	return 0;
+}
 
-			list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
 
-			get_bh(bh);
-			spin_unlock(&sdp->sd_ail_lock);
-			lock_buffer(bh);
-			if (test_clear_buffer_dirty(bh)) {
-				bh->b_end_io = end_buffer_write_sync;
-				submit_bh(WRITE_SYNC, bh);
-			} else {
-				unlock_buffer(bh);
-				brelse(bh);
-			}
-			spin_lock(&sdp->sd_ail_lock);
-
-			retry = 1;
+/**
+ * gfs2_ail1_flush - start writeback of some ail1 entries 
+ * @sdp: The super block
+ * @wbc: The writeback control structure
+ *
+ * Writes back some ail1 entries, according to the limits in the
+ * writeback control structure
+ */
+
+void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
+{
+	struct list_head *head = &sdp->sd_ail1_list;
+	struct gfs2_ail *ai;
+
+	trace_gfs2_ail_flush(sdp, wbc, 1);
+	spin_lock(&sdp->sd_ail_lock);
+restart:
+	list_for_each_entry_reverse(ai, head, ai_list) {
+		if (wbc->nr_to_write <= 0)
 			break;
-		}
-	} while (retry);
+		if (gfs2_ail1_start_one(sdp, wbc, ai))
+			goto restart;
+	}
+	spin_unlock(&sdp->sd_ail_lock);
+	trace_gfs2_ail_flush(sdp, wbc, 0);
+}
+
+/**
+ * gfs2_ail1_start - start writeback of all ail1 entries
+ * @sdp: The superblock
+ */
+
+static void gfs2_ail1_start(struct gfs2_sbd *sdp)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_NONE,
+		.nr_to_write = LONG_MAX,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
+	};
+
+	return gfs2_ail1_flush(sdp, &wbc);
 }
 
 /**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
  *
  */
 
-static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
+static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 {
 	struct gfs2_bufdata *bd, *s;
 	struct buffer_head *bh;
@@ -149,71 +192,37 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
 	list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
 					 bd_ail_st_list) {
 		bh = bd->bd_bh;
-
 		gfs2_assert(sdp, bd->bd_ail == ai);
-
-		if (buffer_busy(bh)) {
-			if (flags & DIO_ALL)
-				continue;
-			else
-				break;
-		}
-
+		if (buffer_busy(bh))
+			continue;
 		if (!buffer_uptodate(bh))
 			gfs2_io_error_bh(sdp, bh);
-
 		list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
 	}
 
-	return list_empty(&ai->ai_ail1_list);
 }
 
-static void gfs2_ail1_start(struct gfs2_sbd *sdp)
-{
-	struct list_head *head;
-	u64 sync_gen;
-	struct gfs2_ail *ai;
-	int done = 0;
-
-	spin_lock(&sdp->sd_ail_lock);
-	head = &sdp->sd_ail1_list;
-	if (list_empty(head)) {
-		spin_unlock(&sdp->sd_ail_lock);
-		return;
-	}
-	sync_gen = sdp->sd_ail_sync_gen++;
-
-	while(!done) {
-		done = 1;
-		list_for_each_entry_reverse(ai, head, ai_list) {
-			if (ai->ai_sync_gen >= sync_gen)
-				continue;
-			ai->ai_sync_gen = sync_gen;
-			gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
-			done = 0;
-			break;
-		}
-	}
-
-	spin_unlock(&sdp->sd_ail_lock);
-}
+/**
+ * gfs2_ail1_empty - Try to empty the ail1 lists
+ * @sdp: The superblock
+ *
+ * Tries to empty the ail1 lists, starting with the oldest first
+ */
 
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
 {
 	struct gfs2_ail *ai, *s;
 	int ret;
 
 	spin_lock(&sdp->sd_ail_lock);
-
 	list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
-		if (gfs2_ail1_empty_one(sdp, ai, flags))
+		gfs2_ail1_empty_one(sdp, ai);
+		if (list_empty(&ai->ai_ail1_list))
 			list_move(&ai->ai_list, &sdp->sd_ail2_list);
-		else if (!(flags & DIO_ALL))
+		else
 			break;
 	}
-
 	ret = list_empty(&sdp->sd_ail1_list);
-
 	spin_unlock(&sdp->sd_ail_lock);
 
 	return ret;
@@ -574,7 +583,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	set_buffer_uptodate(bh);
 	clear_buffer_dirty(bh);
 
-	gfs2_ail1_empty(sdp, 0);
+	gfs2_ail1_empty(sdp);
 	tail = current_tail(sdp);
 
 	lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,7 +878,7 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
 	gfs2_log_flush(sdp, NULL);
 	for (;;) {
 		gfs2_ail1_start(sdp);
-		if (gfs2_ail1_empty(sdp, DIO_ALL))
+		if (gfs2_ail1_empty(sdp))
 			break;
 		msleep(10);
 	}
@@ -905,17 +914,15 @@ int gfs2_logd(void *data)
 
 		preflush = atomic_read(&sdp->sd_log_pinned);
 		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
-			gfs2_ail1_empty(sdp, DIO_ALL);
+			gfs2_ail1_empty(sdp);
 			gfs2_log_flush(sdp, NULL);
-			gfs2_ail1_empty(sdp, DIO_ALL);
 		}
 
 		if (gfs2_ail_flush_reqd(sdp)) {
 			gfs2_ail1_start(sdp);
 			io_schedule();
-			gfs2_ail1_empty(sdp, 0);
+			gfs2_ail1_empty(sdp);
 			gfs2_log_flush(sdp, NULL);
-			gfs2_ail1_empty(sdp, DIO_ALL);
 		}
 
 		wake_up(&sdp->sd_log_waitq);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/writeback.h>
 #include "incore.h"
 
 /**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
 extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
 extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
 extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
 
 extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
 extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 {
 	struct gfs2_bufdata *bd;
 
-	gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
+	BUG_ON(!current->journal_info);
 
 	clear_buffer_dirty(bh);
 	if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
  * @sdp: the filesystem the buffer belongs to
  * @bh: The buffer to unpin
  * @ai:
+ * @flags: The inode dirty flags
  *
  */
 
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 {
 	struct gfs2_bufdata *bd = bh->b_private;
 
-	gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
-
-	if (!buffer_pinned(bh))
-		gfs2_assert_withdraw(sdp, 0);
+	BUG_ON(!buffer_uptodate(bh));
+	BUG_ON(!buffer_pinned(bh));
 
 	lock_buffer(bh);
 	mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
 	spin_unlock(&sdp->sd_ail_lock);
 
-	if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
-		gfs2_glock_schedule_for_reclaim(bd->bd_gl);
+	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 	trace_gfs2_pin(bd, 0);
 	unlock_buffer(bh);
 	atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 
 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 {
+	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+	struct gfs2_glock *gl = bd->bd_gl;
 	struct gfs2_trans *tr;
 
 	tr = current->journal_info;
 	tr->tr_touched = 1;
 	tr->tr_num_revoke++;
 	sdp->sd_log_num_revoke++;
+	atomic_inc(&gl->gl_revokes);
+	set_bit(GLF_LFLUSH, &gl->gl_flags);
 	list_add(&le->le_list, &sdp->sd_log_le_revoke);
 }
 
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 	ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
 	offset = sizeof(struct gfs2_log_descriptor);
 
-	while (!list_empty(head)) {
-		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
-		list_del_init(&bd->bd_le.le_list);
+	list_for_each_entry(bd, head, bd_le.le_list) {
 		sdp->sd_log_num_revoke--;
 
 		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 		}
 
 		*(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
-		kmem_cache_free(gfs2_bufdata_cachep, bd);
-
 		offset += sizeof(u64);
 	}
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 	submit_bh(WRITE_SYNC, bh);
 }
 
+static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+	struct list_head *head = &sdp->sd_log_le_revoke;
+	struct gfs2_bufdata *bd;
+	struct gfs2_glock *gl;
+
+	while (!list_empty(head)) {
+		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
+		list_del_init(&bd->bd_le.le_list);
+		gl = bd->bd_gl;
+		atomic_dec(&gl->gl_revokes);
+		clear_bit(GLF_LFLUSH, &gl->gl_flags);
+		kmem_cache_free(gfs2_bufdata_cachep, bd);
+	}
+}
+
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
 				  struct gfs2_log_header_host *head, int pass)
 {
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
 const struct gfs2_log_operations gfs2_revoke_lops = {
 	.lo_add = revoke_lo_add,
 	.lo_before_commit = revoke_lo_before_commit,
+	.lo_after_commit = revoke_lo_after_commit,
 	.lo_before_scan = revoke_lo_before_scan,
 	.lo_scan_elements = revoke_lo_scan_elements,
 	.lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..cfa327d33194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
 	INIT_LIST_HEAD(&gl->gl_lru);
 	INIT_LIST_HEAD(&gl->gl_ail_list);
 	atomic_set(&gl->gl_ail_count, 0);
+	atomic_set(&gl->gl_revokes, 0);
 }
 
 static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "trace_gfs2.h"
 
 static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
 {
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
 	struct gfs2_bufdata *bd = bh->b_private;
 
 	if (test_clear_buffer_pinned(bh)) {
+		trace_gfs2_pin(bd, 0);
 		atomic_dec(&sdp->sd_log_pinned);
 		list_del_init(&bd->bd_le.le_list);
 		if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
 
 #define buffer_busy(bh) \
 ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
-#define buffer_in_io(bh) \
-((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
 
 #endif /* __DIO_DOT_H__ */
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 42ef24355afb..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
  * changed.
  */
 
-static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
+static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
 {
+	struct gfs2_sb_host *sb = &sdp->sd_sb;
+
 	if (sb->sb_magic != GFS2_MAGIC ||
 	    sb->sb_type != GFS2_METATYPE_SB) {
 		if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
 	unlock_page(page);
 }
 
-static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
 {
+	struct gfs2_sb_host *sb = &sdp->sd_sb;
+	struct super_block *s = sdp->sd_vfs;
 	const struct gfs2_sb *str = buf;
 
 	sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 
 	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
 	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-	memcpy(sb->sb_uuid, str->sb_uuid, 16);
+	memcpy(s->s_uuid, str->sb_uuid, 16);
 }
 
 /**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
  * Returns: 0 on success or error
  */
 
-static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
+static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 		return -EIO;
 	}
 	p = kmap(page);
-	gfs2_sb_in(&sdp->sd_sb, p);
+	gfs2_sb_in(sdp, p);
 	kunmap(page);
 	__free_page(page);
-	return 0;
+	return gfs2_check_sb(sdp, silent);
 }
 
 /**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
 	unsigned int x;
 	int error;
 
-	error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+	error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
 	if (error) {
 		if (!silent)
 			fs_err(sdp, "can't read superblock\n");
 		return error;
 	}
 
-	error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
-	if (error)
-		return error;
-
 	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
 			       GFS2_BASIC_BLOCK_SHIFT;
 	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
 	/*  Try to autodetect  */
 
 	if (!proto[0] || !table[0]) {
-		error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+		error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
 		if (error)
 			return error;
 
-		error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
-		if (error)
-			goto out;
-
 		if (!proto[0])
 			proto = sdp->sd_sb.sb_lockproto;
 		if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
 	while ((table = strchr(table, '/')))
 		*table = '_';
 
-out:
 	return error;
 }
 
@@ -430,7 +425,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
 	struct dentry *dentry;
 	struct inode *inode;
 
-	inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
+	inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
 	if (IS_ERR(inode)) {
 		fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
 		return PTR_ERR(inode);
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	if (sdp->sd_args.ar_statfs_quantum) {
 		sdp->sd_tune.gt_statfs_slow = 0;
 		sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
-	}
-	else {
+	} else {
 		sdp->sd_tune.gt_statfs_slow = 1;
 		sdp->sd_tune.gt_statfs_quantum = 30;
 	}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/namei.h>
-#include <linux/mm.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/crc32.h>
-#include <linux/fiemap.h>
-#include <asm/uaccess.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "acl.h"
-#include "bmap.h"
-#include "dir.h"
-#include "xattr.h"
-#include "glock.h"
-#include "inode.h"
-#include "meta_io.h"
-#include "quota.h"
-#include "rgrp.h"
-#include "trans.h"
-#include "util.h"
-#include "super.h"
-
-/**
- * gfs2_create - Create a file
- * @dir: The directory in which to create the file
- * @dentry: The dentry of the new file
- * @mode: The mode of the new file
- *
- * Returns: errno
- */
-
-static int gfs2_create(struct inode *dir, struct dentry *dentry,
-		       int mode, struct nameidata *nd)
-{
-	struct gfs2_inode *dip = GFS2_I(dir);
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	struct gfs2_holder ghs[2];
-	struct inode *inode;
-
-	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
-	for (;;) {
-		inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
-		if (!IS_ERR(inode)) {
-			gfs2_trans_end(sdp);
-			if (dip->i_alloc->al_rgd)
-				gfs2_inplace_release(dip);
-			gfs2_quota_unlock(dip);
-			gfs2_alloc_put(dip);
-			gfs2_glock_dq_uninit_m(2, ghs);
-			mark_inode_dirty(inode);
-			break;
-		} else if (PTR_ERR(inode) != -EEXIST ||
-			   (nd && nd->flags & LOOKUP_EXCL)) {
-			gfs2_holder_uninit(ghs);
-			return PTR_ERR(inode);
-		}
-
-		inode = gfs2_lookupi(dir, &dentry->d_name, 0);
-		if (inode) {
-			if (!IS_ERR(inode)) {
-				gfs2_holder_uninit(ghs);
-				break;
-			} else {
-				gfs2_holder_uninit(ghs);
-				return PTR_ERR(inode);
-			}
-		}
-	}
-
-	d_instantiate(dentry, inode);
-
-	return 0;
-}
-
-/**
- * gfs2_lookup - Look up a filename in a directory and return its inode
- * @dir: The directory inode
- * @dentry: The dentry of the new inode
- * @nd: passed from Linux VFS, ignored by us
- *
- * Called by the VFS layer. Lock dir and call gfs2_lookupi()
- *
- * Returns: errno
- */
-
-static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
-				  struct nameidata *nd)
-{
-	struct inode *inode = NULL;
-
-	inode = gfs2_lookupi(dir, &dentry->d_name, 0);
-	if (inode && IS_ERR(inode))
-		return ERR_CAST(inode);
-
-	if (inode) {
-		struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
-		struct gfs2_holder gh;
-		int error;
-		error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-		if (error) {
-			iput(inode);
-			return ERR_PTR(error);
-		}
-		gfs2_glock_dq_uninit(&gh);
-		return d_splice_alias(inode, dentry);
-	}
-	d_add(dentry, inode);
-
-	return NULL;
-}
-
-/**
- * gfs2_link - Link to a file
- * @old_dentry: The inode to link
- * @dir: Add link to this directory
- * @dentry: The name of the link
- *
- * Link the inode in "old_dentry" into the directory "dir" with the
- * name in "dentry".
- *
- * Returns: errno
- */
-
-static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
-		     struct dentry *dentry)
-{
-	struct gfs2_inode *dip = GFS2_I(dir);
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	struct inode *inode = old_dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder ghs[2];
-	int alloc_required;
-	int error;
-
-	if (S_ISDIR(inode->i_mode))
-		return -EPERM;
-
-	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-
-	error = gfs2_glock_nq(ghs); /* parent */
-	if (error)
-		goto out_parent;
-
-	error = gfs2_glock_nq(ghs + 1); /* child */
-	if (error)
-		goto out_child;
-
-	error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
-	if (error)
-		goto out_gunlock;
-
-	error = gfs2_dir_check(dir, &dentry->d_name, NULL);
-	switch (error) {
-	case -ENOENT:
-		break;
-	case 0:
-		error = -EEXIST;
-	default:
-		goto out_gunlock;
-	}
-
-	error = -EINVAL;
-	if (!dip->i_inode.i_nlink)
-		goto out_gunlock;
-	error = -EFBIG;
-	if (dip->i_entries == (u32)-1)
-		goto out_gunlock;
-	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		goto out_gunlock;
-	error = -EINVAL;
-	if (!ip->i_inode.i_nlink)
-		goto out_gunlock;
-	error = -EMLINK;
-	if (ip->i_inode.i_nlink == (u32)-1)
-		goto out_gunlock;
-
-	alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
-	if (error < 0)
-		goto out_gunlock;
-	error = 0;
-
-	if (alloc_required) {
-		struct gfs2_alloc *al = gfs2_alloc_get(dip);
-		if (!al) {
-			error = -ENOMEM;
-			goto out_gunlock;
-		}
-
-		error = gfs2_quota_lock_check(dip);
-		if (error)
-			goto out_alloc;
-
-		al->al_requested = sdp->sd_max_dirres;
-
-		error = gfs2_inplace_reserve(dip);
-		if (error)
-			goto out_gunlock_q;
-
-		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(al) +
-					 2 * RES_DINODE + RES_STATFS +
-					 RES_QUOTA, 0);
-		if (error)
-			goto out_ipres;
-	} else {
-		error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
-		if (error)
-			goto out_ipres;
-	}
-
-	error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
-	if (error)
-		goto out_end_trans;
-
-	error = gfs2_change_nlink(ip, +1);
-
-out_end_trans:
-	gfs2_trans_end(sdp);
-out_ipres:
-	if (alloc_required)
-		gfs2_inplace_release(dip);
-out_gunlock_q:
-	if (alloc_required)
-		gfs2_quota_unlock(dip);
-out_alloc:
-	if (alloc_required)
-		gfs2_alloc_put(dip);
-out_gunlock:
-	gfs2_glock_dq(ghs + 1);
-out_child:
-	gfs2_glock_dq(ghs);
-out_parent:
-	gfs2_holder_uninit(ghs);
-	gfs2_holder_uninit(ghs + 1);
-	if (!error) {
-		ihold(inode);
-		d_instantiate(dentry, inode);
-		mark_inode_dirty(inode);
-	}
-	return error;
-}
-
-/*
- * gfs2_unlink_ok - check to see that a inode is still in a directory
- * @dip: the directory
- * @name: the name of the file
- * @ip: the inode
- *
- * Assumes that the lock on (at least) @dip is held.
- *
- * Returns: 0 if the parent/child relationship is correct, errno if it isn't
- */
-
-static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-			  const struct gfs2_inode *ip)
-{
-	int error;
-
-	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
-		return -EPERM;
-
-	if ((dip->i_inode.i_mode & S_ISVTX) &&
-	    dip->i_inode.i_uid != current_fsuid() &&
-	    ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
-		return -EPERM;
-
-	if (IS_APPEND(&dip->i_inode))
-		return -EPERM;
-
-	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
-	if (error)
-		return error;
-
-	error = gfs2_dir_check(&dip->i_inode, name, ip);
-	if (error)
-		return error;
-
-	return 0;
-}
-
-/**
- * gfs2_unlink - Unlink a file
- * @dir: The inode of the directory containing the file to unlink
- * @dentry: The file itself
- *
- * Unlink a file.  Call gfs2_unlinki()
- *
- * Returns: errno
- */
-
-static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
-{
-	struct gfs2_inode *dip = GFS2_I(dir);
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-	struct gfs2_holder ghs[3];
-	struct gfs2_rgrpd *rgd;
-	struct gfs2_holder ri_gh;
-	int error;
-
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		return error;
-
-	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-	gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
-
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
-	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
-
-
-	error = gfs2_glock_nq(ghs); /* parent */
-	if (error)
-		goto out_parent;
-
-	error = gfs2_glock_nq(ghs + 1); /* child */
-	if (error)
-		goto out_child;
-
-	error = gfs2_glock_nq(ghs + 2); /* rgrp */
-	if (error)
-		goto out_rgrp;
-
-	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
-	if (error)
-		goto out_gunlock;
-
-	error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
-	if (error)
-		goto out_gunlock;
-
-	error = gfs2_dir_del(dip, &dentry->d_name);
-        if (error)
-                goto out_end_trans;
-
-	error = gfs2_change_nlink(ip, -1);
-
-out_end_trans:
-	gfs2_trans_end(sdp);
-out_gunlock:
-	gfs2_glock_dq(ghs + 2);
-out_rgrp:
-	gfs2_holder_uninit(ghs + 2);
-	gfs2_glock_dq(ghs + 1);
-out_child:
-	gfs2_holder_uninit(ghs + 1);
-	gfs2_glock_dq(ghs);
-out_parent:
-	gfs2_holder_uninit(ghs);
-	gfs2_glock_dq_uninit(&ri_gh);
-	return error;
-}
-
-/**
- * gfs2_symlink - Create a symlink
- * @dir: The directory to create the symlink in
- * @dentry: The dentry to put the symlink in
- * @symname: The thing which the link points to
- *
- * Returns: errno
- */
-
-static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
-			const char *symname)
-{
-	struct gfs2_inode *dip = GFS2_I(dir), *ip;
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	struct gfs2_holder ghs[2];
-	struct inode *inode;
-	struct buffer_head *dibh;
-	int size;
-	int error;
-
-	/* Must be stuffed with a null terminator for gfs2_follow_link() */
-	size = strlen(symname);
-	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
-		return -ENAMETOOLONG;
-
-	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
-	inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
-	if (IS_ERR(inode)) {
-		gfs2_holder_uninit(ghs);
-		return PTR_ERR(inode);
-	}
-
-	ip = ghs[1].gh_gl->gl_object;
-
-	i_size_write(inode, size);
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-
-	if (!gfs2_assert_withdraw(sdp, !error)) {
-		gfs2_dinode_out(ip, dibh->b_data);
-		memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
-		       size);
-		brelse(dibh);
-	}
-
-	gfs2_trans_end(sdp);
-	if (dip->i_alloc->al_rgd)
-		gfs2_inplace_release(dip);
-	gfs2_quota_unlock(dip);
-	gfs2_alloc_put(dip);
-
-	gfs2_glock_dq_uninit_m(2, ghs);
-
-	d_instantiate(dentry, inode);
-	mark_inode_dirty(inode);
-
-	return 0;
-}
-
-/**
- * gfs2_mkdir - Make a directory
- * @dir: The parent directory of the new one
- * @dentry: The dentry of the new directory
- * @mode: The mode of the new directory
- *
- * Returns: errno
- */
-
-static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-	struct gfs2_inode *dip = GFS2_I(dir), *ip;
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	struct gfs2_holder ghs[2];
-	struct inode *inode;
-	struct buffer_head *dibh;
-	int error;
-
-	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
-	inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
-	if (IS_ERR(inode)) {
-		gfs2_holder_uninit(ghs);
-		return PTR_ERR(inode);
-	}
-
-	ip = ghs[1].gh_gl->gl_object;
-
-	ip->i_inode.i_nlink = 2;
-	i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
-	ip->i_diskflags |= GFS2_DIF_JDATA;
-	ip->i_entries = 2;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-
-	if (!gfs2_assert_withdraw(sdp, !error)) {
-		struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
-		struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
-
-		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
-		dent->de_inum = di->di_num; /* already GFS2 endian */
-		dent->de_type = cpu_to_be16(DT_DIR);
-		di->di_entries = cpu_to_be32(1);
-
-		dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
-		gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
-
-		gfs2_inum_out(dip, dent);
-		dent->de_type = cpu_to_be16(DT_DIR);
-
-		gfs2_dinode_out(ip, di);
-
-		brelse(dibh);
-	}
-
-	error = gfs2_change_nlink(dip, +1);
-	gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
-
-	gfs2_trans_end(sdp);
-	if (dip->i_alloc->al_rgd)
-		gfs2_inplace_release(dip);
-	gfs2_quota_unlock(dip);
-	gfs2_alloc_put(dip);
-
-	gfs2_glock_dq_uninit_m(2, ghs);
-
-	d_instantiate(dentry, inode);
-	mark_inode_dirty(inode);
-
-	return 0;
-}
-
-/**
- * gfs2_rmdiri - Remove a directory
- * @dip: The parent directory of the directory to be removed
- * @name: The name of the directory to be removed
- * @ip: The GFS2 inode of the directory to be removed
- *
- * Assumes Glocks on dip and ip are held
- *
- * Returns: errno
- */
-
-static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
-		       struct gfs2_inode *ip)
-{
-	int error;
-
-	if (ip->i_entries != 2) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		return -EIO;
-	}
-
-	error = gfs2_dir_del(dip, name);
-	if (error)
-		return error;
-
-	error = gfs2_change_nlink(dip, -1);
-	if (error)
-		return error;
-
-	error = gfs2_dir_del(ip, &gfs2_qdot);
-	if (error)
-		return error;
-
-	error = gfs2_dir_del(ip, &gfs2_qdotdot);
-	if (error)
-		return error;
-
-	/* It looks odd, but it really should be done twice */
-	error = gfs2_change_nlink(ip, -1);
-	if (error)
-		return error;
-
-	error = gfs2_change_nlink(ip, -1);
-	if (error)
-		return error;
-
-	return error;
-}
-
-/**
- * gfs2_rmdir - Remove a directory
- * @dir: The parent directory of the directory to be removed
- * @dentry: The dentry of the directory to remove
- *
- * Remove a directory. Call gfs2_rmdiri()
- *
- * Returns: errno
- */
-
-static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
-{
-	struct gfs2_inode *dip = GFS2_I(dir);
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-	struct gfs2_holder ghs[3];
-	struct gfs2_rgrpd *rgd;
-	struct gfs2_holder ri_gh;
-	int error;
-
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		return error;
-	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
-	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
-
-	error = gfs2_glock_nq(ghs); /* parent */
-	if (error)
-		goto out_parent;
-
-	error = gfs2_glock_nq(ghs + 1); /* child */
-	if (error)
-		goto out_child;
-
-	error = gfs2_glock_nq(ghs + 2); /* rgrp */
-	if (error)
-		goto out_rgrp;
-
-	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
-	if (error)
-		goto out_gunlock;
-
-	if (ip->i_entries < 2) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		error = -EIO;
-		goto out_gunlock;
-	}
-	if (ip->i_entries > 2) {
-		error = -ENOTEMPTY;
-		goto out_gunlock;
-	}
-
-	error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
-	if (error)
-		goto out_gunlock;
-
-	error = gfs2_rmdiri(dip, &dentry->d_name, ip);
-
-	gfs2_trans_end(sdp);
-
-out_gunlock:
-	gfs2_glock_dq(ghs + 2);
-out_rgrp:
-	gfs2_holder_uninit(ghs + 2);
-	gfs2_glock_dq(ghs + 1);
-out_child:
-	gfs2_holder_uninit(ghs + 1);
-	gfs2_glock_dq(ghs);
-out_parent:
-	gfs2_holder_uninit(ghs);
-	gfs2_glock_dq_uninit(&ri_gh);
-	return error;
-}
-
-/**
- * gfs2_mknod - Make a special file
- * @dir: The directory in which the special file will reside
- * @dentry: The dentry of the special file
- * @mode: The mode of the special file
- * @rdev: The device specification of the special file
- *
- */
-
-static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
-		      dev_t dev)
-{
-	struct gfs2_inode *dip = GFS2_I(dir);
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	struct gfs2_holder ghs[2];
-	struct inode *inode;
-
-	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-
-	inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
-	if (IS_ERR(inode)) {
-		gfs2_holder_uninit(ghs);
-		return PTR_ERR(inode);
-	}
-
-	gfs2_trans_end(sdp);
-	if (dip->i_alloc->al_rgd)
-		gfs2_inplace_release(dip);
-	gfs2_quota_unlock(dip);
-	gfs2_alloc_put(dip);
-
-	gfs2_glock_dq_uninit_m(2, ghs);
-
-	d_instantiate(dentry, inode);
-	mark_inode_dirty(inode);
-
-	return 0;
-}
-
-/*
- * gfs2_ok_to_move - check if it's ok to move a directory to another directory
- * @this: move this
- * @to: to here
- *
- * Follow @to back to the root and make sure we don't encounter @this
- * Assumes we already hold the rename lock.
- *
- * Returns: errno
- */
-
-static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
-{
-	struct inode *dir = &to->i_inode;
-	struct super_block *sb = dir->i_sb;
-	struct inode *tmp;
-	int error = 0;
-
-	igrab(dir);
-
-	for (;;) {
-		if (dir == &this->i_inode) {
-			error = -EINVAL;
-			break;
-		}
-		if (dir == sb->s_root->d_inode) {
-			error = 0;
-			break;
-		}
-
-		tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
-		if (IS_ERR(tmp)) {
-			error = PTR_ERR(tmp);
-			break;
-		}
-
-		iput(dir);
-		dir = tmp;
-	}
-
-	iput(dir);
-
-	return error;
-}
-
-/**
- * gfs2_rename - Rename a file
- * @odir: Parent directory of old file name
- * @odentry: The old dentry of the file
- * @ndir: Parent directory of new file name
- * @ndentry: The new dentry of the file
- *
- * Returns: errno
- */
-
-static int gfs2_rename(struct inode *odir, struct dentry *odentry,
-		       struct inode *ndir, struct dentry *ndentry)
-{
-	struct gfs2_inode *odip = GFS2_I(odir);
-	struct gfs2_inode *ndip = GFS2_I(ndir);
-	struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
-	struct gfs2_inode *nip = NULL;
-	struct gfs2_sbd *sdp = GFS2_SB(odir);
-	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
-	struct gfs2_rgrpd *nrgd;
-	unsigned int num_gh;
-	int dir_rename = 0;
-	int alloc_required = 0;
-	unsigned int x;
-	int error;
-
-	if (ndentry->d_inode) {
-		nip = GFS2_I(ndentry->d_inode);
-		if (ip == nip)
-			return 0;
-	}
-
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		return error;
-
-	if (odip != ndip) {
-		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
-					   0, &r_gh);
-		if (error)
-			goto out;
-
-		if (S_ISDIR(ip->i_inode.i_mode)) {
-			dir_rename = 1;
-			/* don't move a dirctory into it's subdir */
-			error = gfs2_ok_to_move(ip, ndip);
-			if (error)
-				goto out_gunlock_r;
-		}
-	}
-
-	num_gh = 1;
-	gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-	if (odip != ndip) {
-		gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
-		num_gh++;
-	}
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
-	num_gh++;
-
-	if (nip) {
-		gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
-		num_gh++;
-		/* grab the resource lock for unlink flag twiddling 
-		 * this is the case of the target file already existing
-		 * so we unlink before doing the rename
-		 */
-		nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
-		if (nrgd)
-			gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
-	}
-
-	for (x = 0; x < num_gh; x++) {
-		error = gfs2_glock_nq(ghs + x);
-		if (error)
-			goto out_gunlock;
-	}
-
-	/* Check out the old directory */
-
-	error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
-	if (error)
-		goto out_gunlock;
-
-	/* Check out the new directory */
-
-	if (nip) {
-		error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
-		if (error)
-			goto out_gunlock;
-
-		if (S_ISDIR(nip->i_inode.i_mode)) {
-			if (nip->i_entries < 2) {
-				if (gfs2_consist_inode(nip))
-					gfs2_dinode_print(nip);
-				error = -EIO;
-				goto out_gunlock;
-			}
-			if (nip->i_entries > 2) {
-				error = -ENOTEMPTY;
-				goto out_gunlock;
-			}
-		}
-	} else {
-		error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
-		if (error)
-			goto out_gunlock;
-
-		error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
-		switch (error) {
-		case -ENOENT:
-			error = 0;
-			break;
-		case 0:
-			error = -EEXIST;
-		default:
-			goto out_gunlock;
-		};
-
-		if (odip != ndip) {
-			if (!ndip->i_inode.i_nlink) {
-				error = -EINVAL;
-				goto out_gunlock;
-			}
-			if (ndip->i_entries == (u32)-1) {
-				error = -EFBIG;
-				goto out_gunlock;
-			}
-			if (S_ISDIR(ip->i_inode.i_mode) &&
-			    ndip->i_inode.i_nlink == (u32)-1) {
-				error = -EMLINK;
-				goto out_gunlock;
-			}
-		}
-	}
-
-	/* Check out the dir to be renamed */
-
-	if (dir_rename) {
-		error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
-		if (error)
-			goto out_gunlock;
-	}
-
-	if (nip == NULL)
-		alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
-	error = alloc_required;
-	if (error < 0)
-		goto out_gunlock;
-	error = 0;
-
-	if (alloc_required) {
-		struct gfs2_alloc *al = gfs2_alloc_get(ndip);
-		if (!al) {
-			error = -ENOMEM;
-			goto out_gunlock;
-		}
-
-		error = gfs2_quota_lock_check(ndip);
-		if (error)
-			goto out_alloc;
-
-		al->al_requested = sdp->sd_max_dirres;
-
-		error = gfs2_inplace_reserve_ri(ndip);
-		if (error)
-			goto out_gunlock_q;
-
-		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(al) +
-					 4 * RES_DINODE + 4 * RES_LEAF +
-					 RES_STATFS + RES_QUOTA + 4, 0);
-		if (error)
-			goto out_ipreserv;
-	} else {
-		error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
-					 5 * RES_LEAF + 4, 0);
-		if (error)
-			goto out_gunlock;
-	}
-
-	/* Remove the target file, if it exists */
-
-	if (nip) {
-		if (S_ISDIR(nip->i_inode.i_mode))
-			error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
-		else {
-			error = gfs2_dir_del(ndip, &ndentry->d_name);
-			if (error)
-				goto out_end_trans;
-			error = gfs2_change_nlink(nip, -1);
-		}
-		if (error)
-			goto out_end_trans;
-	}
-
-	if (dir_rename) {
-		error = gfs2_change_nlink(ndip, +1);
-		if (error)
-			goto out_end_trans;
-		error = gfs2_change_nlink(odip, -1);
-		if (error)
-			goto out_end_trans;
-
-		error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
-		if (error)
-			goto out_end_trans;
-	} else {
-		struct buffer_head *dibh;
-		error = gfs2_meta_inode_buffer(ip, &dibh);
-		if (error)
-			goto out_end_trans;
-		ip->i_inode.i_ctime = CURRENT_TIME;
-		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
-	}
-
-	error = gfs2_dir_del(odip, &odentry->d_name);
-	if (error)
-		goto out_end_trans;
-
-	error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
-	if (error)
-		goto out_end_trans;
-
-out_end_trans:
-	gfs2_trans_end(sdp);
-out_ipreserv:
-	if (alloc_required)
-		gfs2_inplace_release(ndip);
-out_gunlock_q:
-	if (alloc_required)
-		gfs2_quota_unlock(ndip);
-out_alloc:
-	if (alloc_required)
-		gfs2_alloc_put(ndip);
-out_gunlock:
-	while (x--) {
-		gfs2_glock_dq(ghs + x);
-		gfs2_holder_uninit(ghs + x);
-	}
-out_gunlock_r:
-	if (r_gh.gh_gl)
-		gfs2_glock_dq_uninit(&r_gh);
-out:
-	gfs2_glock_dq_uninit(&ri_gh);
-	return error;
-}
-
-/**
- * gfs2_follow_link - Follow a symbolic link
- * @dentry: The dentry of the link
- * @nd: Data that we pass to vfs_follow_link()
- *
- * This can handle symlinks of any size.
- *
- * Returns: 0 on success or error code
- */
-
-static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-	struct gfs2_holder i_gh;
-	struct buffer_head *dibh;
-	unsigned int x, size;
-	char *buf;
-	int error;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
-	error = gfs2_glock_nq(&i_gh);
-	if (error) {
-		gfs2_holder_uninit(&i_gh);
-		nd_set_link(nd, ERR_PTR(error));
-		return NULL;
-	}
-
-	size = (unsigned int)i_size_read(&ip->i_inode);
-	if (size == 0) {
-		gfs2_consist_inode(ip);
-		buf = ERR_PTR(-EIO);
-		goto out;
-	}
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error) {
-		buf = ERR_PTR(error);
-		goto out;
-	}
-
-	x = size + 1;
-	buf = kmalloc(x, GFP_NOFS);
-	if (!buf)
-		buf = ERR_PTR(-ENOMEM);
-	else
-		memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
-	brelse(dibh);
-out:
-	gfs2_glock_dq_uninit(&i_gh);
-	nd_set_link(nd, buf);
-	return NULL;
-}
-
-static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		kfree(s);
-}
-
-/**
- * gfs2_permission -
- * @inode: The inode
- * @mask: The mask to be tested
- * @flags: Indicates whether this is an RCU path walk or not
- *
- * This may be called from the VFS directly, or from within GFS2 with the
- * inode locked, so we look to see if the glock is already locked and only
- * lock the glock if its not already been done.
- *
- * Returns: errno
- */
-
-int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
-{
-	struct gfs2_inode *ip;
-	struct gfs2_holder i_gh;
-	int error;
-	int unlock = 0;
-
-
-	ip = GFS2_I(inode);
-	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-		if (flags & IPERM_FLAG_RCU)
-			return -ECHILD;
-		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-		if (error)
-			return error;
-		unlock = 1;
-	}
-
-	if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
-		error = -EACCES;
-	else
-		error = generic_permission(inode, mask, flags, gfs2_check_acl);
-	if (unlock)
-		gfs2_glock_dq_uninit(&i_gh);
-
-	return error;
-}
-
-static int setattr_chown(struct inode *inode, struct iattr *attr)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	u32 ouid, ogid, nuid, ngid;
-	int error;
-
-	ouid = inode->i_uid;
-	ogid = inode->i_gid;
-	nuid = attr->ia_uid;
-	ngid = attr->ia_gid;
-
-	if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
-		ouid = nuid = NO_QUOTA_CHANGE;
-	if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
-		ogid = ngid = NO_QUOTA_CHANGE;
-
-	if (!gfs2_alloc_get(ip))
-		return -ENOMEM;
-
-	error = gfs2_quota_lock(ip, nuid, ngid);
-	if (error)
-		goto out_alloc;
-
-	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-		error = gfs2_quota_check(ip, nuid, ngid);
-		if (error)
-			goto out_gunlock_q;
-	}
-
-	error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
-	if (error)
-		goto out_gunlock_q;
-
-	error = gfs2_setattr_simple(ip, attr);
-	if (error)
-		goto out_end_trans;
-
-	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-		u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
-		gfs2_quota_change(ip, -blocks, ouid, ogid);
-		gfs2_quota_change(ip, blocks, nuid, ngid);
-	}
-
-out_end_trans:
-	gfs2_trans_end(sdp);
-out_gunlock_q:
-	gfs2_quota_unlock(ip);
-out_alloc:
-	gfs2_alloc_put(ip);
-	return error;
-}
-
-/**
- * gfs2_setattr - Change attributes on an inode
- * @dentry: The dentry which is changing
- * @attr: The structure describing the change
- *
- * The VFS layer wants to change one or more of an inodes attributes.  Write
- * that change out to disk.
- *
- * Returns: errno
- */
-
-static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
-{
-	struct inode *inode = dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder i_gh;
-	int error;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
-	if (error)
-		return error;
-
-	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		goto out;
-
-	error = inode_change_ok(inode, attr);
-	if (error)
-		goto out;
-
-	if (attr->ia_valid & ATTR_SIZE)
-		error = gfs2_setattr_size(inode, attr->ia_size);
-	else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
-		error = setattr_chown(inode, attr);
-	else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
-		error = gfs2_acl_chmod(ip, attr);
-	else
-		error = gfs2_setattr_simple(ip, attr);
-
-out:
-	gfs2_glock_dq_uninit(&i_gh);
-	if (!error)
-		mark_inode_dirty(inode);
-	return error;
-}
-
-/**
- * gfs2_getattr - Read out an inode's attributes
- * @mnt: The vfsmount the inode is being accessed from
- * @dentry: The dentry to stat
- * @stat: The inode's stats
- *
- * This may be called from the VFS directly, or from within GFS2 with the
- * inode locked, so we look to see if the glock is already locked and only
- * lock the glock if its not already been done. Note that its the NFS
- * readdirplus operation which causes this to be called (from filldir)
- * with the glock already held.
- *
- * Returns: errno
- */
-
-static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat)
-{
-	struct inode *inode = dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int error;
-	int unlock = 0;
-
-	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-		if (error)
-			return error;
-		unlock = 1;
-	}
-
-	generic_fillattr(inode, stat);
-	if (unlock)
-		gfs2_glock_dq_uninit(&gh);
-
-	return 0;
-}
-
-static int gfs2_setxattr(struct dentry *dentry, const char *name,
-			 const void *data, size_t size, int flags)
-{
-	struct inode *inode = dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int ret;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	ret = gfs2_glock_nq(&gh);
-	if (ret == 0) {
-		ret = generic_setxattr(dentry, name, data, size, flags);
-		gfs2_glock_dq(&gh);
-	}
-	gfs2_holder_uninit(&gh);
-	return ret;
-}
-
-static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
-			     void *data, size_t size)
-{
-	struct inode *inode = dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int ret;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-	ret = gfs2_glock_nq(&gh);
-	if (ret == 0) {
-		ret = generic_getxattr(dentry, name, data, size);
-		gfs2_glock_dq(&gh);
-	}
-	gfs2_holder_uninit(&gh);
-	return ret;
-}
-
-static int gfs2_removexattr(struct dentry *dentry, const char *name)
-{
-	struct inode *inode = dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int ret;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	ret = gfs2_glock_nq(&gh);
-	if (ret == 0) {
-		ret = generic_removexattr(dentry, name);
-		gfs2_glock_dq(&gh);
-	}
-	gfs2_holder_uninit(&gh);
-	return ret;
-}
-
-static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-		       u64 start, u64 len)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int ret;
-
-	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
-	if (ret)
-		return ret;
-
-	mutex_lock(&inode->i_mutex);
-
-	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-	if (ret)
-		goto out;
-
-	if (gfs2_is_stuffed(ip)) {
-		u64 phys = ip->i_no_addr << inode->i_blkbits;
-		u64 size = i_size_read(inode);
-		u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
-			    FIEMAP_EXTENT_DATA_INLINE;
-		phys += sizeof(struct gfs2_dinode);
-		phys += start;
-		if (start + len > size)
-			len = size - start;
-		if (start < size)
-			ret = fiemap_fill_next_extent(fieinfo, start, phys,
-						      len, flags);
-		if (ret == 1)
-			ret = 0;
-	} else {
-		ret = __generic_block_fiemap(inode, fieinfo, start, len,
-					     gfs2_block_map);
-	}
-
-	gfs2_glock_dq_uninit(&gh);
-out:
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-const struct inode_operations gfs2_file_iops = {
-	.permission = gfs2_permission,
-	.setattr = gfs2_setattr,
-	.getattr = gfs2_getattr,
-	.setxattr = gfs2_setxattr,
-	.getxattr = gfs2_getxattr,
-	.listxattr = gfs2_listxattr,
-	.removexattr = gfs2_removexattr,
-	.fiemap = gfs2_fiemap,
-};
-
-const struct inode_operations gfs2_dir_iops = {
-	.create = gfs2_create,
-	.lookup = gfs2_lookup,
-	.link = gfs2_link,
-	.unlink = gfs2_unlink,
-	.symlink = gfs2_symlink,
-	.mkdir = gfs2_mkdir,
-	.rmdir = gfs2_rmdir,
-	.mknod = gfs2_mknod,
-	.rename = gfs2_rename,
-	.permission = gfs2_permission,
-	.setattr = gfs2_setattr,
-	.getattr = gfs2_getattr,
-	.setxattr = gfs2_setxattr,
-	.getxattr = gfs2_getxattr,
-	.listxattr = gfs2_listxattr,
-	.removexattr = gfs2_removexattr,
-	.fiemap = gfs2_fiemap,
-};
-
-const struct inode_operations gfs2_symlink_iops = {
-	.readlink = generic_readlink,
-	.follow_link = gfs2_follow_link,
-	.put_link = gfs2_put_link,
-	.permission = gfs2_permission,
-	.setattr = gfs2_setattr,
-	.getattr = gfs2_getattr,
-	.setxattr = gfs2_setxattr,
-	.getxattr = gfs2_getxattr,
-	.listxattr = gfs2_listxattr,
-	.removexattr = gfs2_removexattr,
-	.fiemap = gfs2_fiemap,
-};
-
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index cf930cd9664a..7273ad3c85ba 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
 
 static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
 			       unsigned char *buf2, unsigned int offset,
-			       unsigned int buflen, u32 block,
+			       struct gfs2_bitmap *bi, u32 block,
 			       unsigned char new_state)
 {
 	unsigned char *byte1, *byte2, *end, cur_state;
+	unsigned int buflen = bi->bi_len;
 	const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
 
 	byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
 	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 
 	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
+		printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
+		       "new_state=%d\n",
+		       (unsigned long long)block, cur_state, new_state);
+		printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
+		       (unsigned long long)rgd->rd_addr,
+		       (unsigned long)bi->bi_start);
+		printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
+		       (unsigned long)bi->bi_offset,
+		       (unsigned long)bi->bi_len);
+		dump_stack();
 		gfs2_consist_rgrpd(rgd);
 		return;
 	}
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
 
 		if (gl) {
 			gl->gl_object = NULL;
+			gfs2_glock_add_to_lru(gl);
 			gfs2_glock_put(gl);
 		}
 
@@ -945,7 +957,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 		/* rgblk_search can return a block < goal, so we need to
 		   keep it marching forward. */
 		no_addr = block + rgd->rd_data0;
-		goal++;
+		goal = max(block + 1, goal + 1);
 		if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
 			continue;
 		if (no_addr == skip)
@@ -971,7 +983,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 			found++;
 
 		/* Limit reclaim to sensible number of tasks */
-		if (found > 2*NR_CPUS)
+		if (found > NR_CPUS)
 			return;
 	}
 
@@ -1365,7 +1377,7 @@ skip:
 
 	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
 	gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-		    bi->bi_len, blk, new_state);
+		    bi, blk, new_state);
 	goal = blk;
 	while (*n < elen) {
 		goal++;
@@ -1375,7 +1387,7 @@ skip:
 		    GFS2_BLKST_FREE)
 			break;
 		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-			    bi->bi_len, goal, new_state);
+			    bi, goal, new_state);
 		(*n)++;
 	}
 out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 		}
 		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
 		gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
-			    bi->bi_len, buf_blk, new_state);
+			    bi, buf_blk, new_state);
 	}
 
 	return rgd;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a4e23d68a398..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
 #include <linux/time.h>
 #include <linux/wait.h>
 #include <linux/writeback.h>
+#include <linux/backing-dev.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
 	mutex_unlock(&sdp->sd_freeze_lock);
 }
 
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+	struct gfs2_dinode *str = buf;
+
+	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
+	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
+	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+
+	str->di_goal_meta = cpu_to_be64(ip->i_goal);
+	str->di_goal_data = cpu_to_be64(ip->i_goal);
+	str->di_generation = cpu_to_be64(ip->i_generation);
+
+	str->di_flags = cpu_to_be32(ip->i_diskflags);
+	str->di_height = cpu_to_be16(ip->i_height);
+	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
+					     GFS2_FORMAT_DE : 0);
+	str->di_depth = cpu_to_be16(ip->i_depth);
+	str->di_entries = cpu_to_be32(ip->i_entries);
+
+	str->di_eattr = cpu_to_be64(ip->i_eattr);
+	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
 
 /**
  * gfs2_write_inode - Make sure the inode is stable on the disk
  * @inode: The inode
- * @sync: synchronous write flag
+ * @wbc: The writeback control structure
  *
  * Returns: errno
  */
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
+	struct backing_dev_info *bdi = metamapping->backing_dev_info;
 	struct gfs2_holder gh;
 	struct buffer_head *bh;
 	struct timespec atime;
 	struct gfs2_dinode *di;
-	int ret = 0;
+	int ret = -EAGAIN;
 
-	/* Check this is a "normal" inode, etc */
+	/* Skip timestamp update, if this is from a memalloc */
 	if (current->flags & PF_MEMALLOC)
-		return 0;
+		goto do_flush;
 	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	if (ret)
 		goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
 do_flush:
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+	filemap_fdatawrite(metamapping);
+	if (bdi->dirty_exceeded)
+		gfs2_ail1_flush(sdp, wbc);
+	if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
+		ret = filemap_fdatawait(metamapping);
+	if (ret)
+		mark_inode_dirty_sync(inode);
 	return ret;
 }
 
@@ -874,8 +920,9 @@ restart:
 
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
-	if (wait && sb->s_fs_info)
-		gfs2_log_flush(sb->s_fs_info, NULL);
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	if (wait && sdp)
+		gfs2_log_flush(sdp, NULL);
 	return 0;
 }
 
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 	return 0;
 }
 
+static void gfs2_final_release_pages(struct gfs2_inode *ip)
+{
+	struct inode *inode = &ip->i_inode;
+	struct gfs2_glock *gl = ip->i_gl;
+
+	truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (atomic_read(&gl->gl_revokes) == 0) {
+		clear_bit(GLF_LFLUSH, &gl->gl_flags);
+		clear_bit(GLF_DIRTY, &gl->gl_flags);
+	}
+}
+
+static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct gfs2_alloc *al;
+	struct gfs2_rgrpd *rgd;
+	int error;
+
+	if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
+		gfs2_consist_inode(ip);
+		return -EIO;
+	}
+
+	al = gfs2_alloc_get(ip);
+	if (!al)
+		return -ENOMEM;
+
+	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (error)
+		goto out;
+
+	error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+	if (error)
+		goto out_qs;
+
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+	if (!rgd) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+		goto out_rindex_relse;
+	}
+
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+				   &al->al_rgd_gh);
+	if (error)
+		goto out_rindex_relse;
+
+	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
+				 sdp->sd_jdesc->jd_blocks);
+	if (error)
+		goto out_rg_gunlock;
+
+	gfs2_free_di(rgd, ip);
+
+	gfs2_final_release_pages(ip);
+
+	gfs2_trans_end(sdp);
+
+out_rg_gunlock:
+	gfs2_glock_dq_uninit(&al->al_rgd_gh);
+out_rindex_relse:
+	gfs2_glock_dq_uninit(&al->al_ri_gh);
+out_qs:
+	gfs2_quota_unhold(ip);
+out:
+	gfs2_alloc_put(ip);
+	return error;
+}
+
 /*
  * We have to (at the moment) hold the inodes main lock to cover
  * the gap between unlocking the shared lock on the iopen lock and
@@ -1318,15 +1437,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 
 static void gfs2_evict_inode(struct inode *inode)
 {
-	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+	struct super_block *sb = inode->i_sb;
+	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
 
-	if (inode->i_nlink)
+	if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
 		goto out;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	/* Must not read inode block until block type has been verified */
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		goto out;
@@ -1336,6 +1457,12 @@ static void gfs2_evict_inode(struct inode *inode)
 	if (error)
 		goto out_truncate;
 
+	if (test_bit(GIF_INVALID, &ip->i_flags)) {
+		error = gfs2_inode_refresh(ip);
+		if (error)
+			goto out_truncate;
+	}
+
 	ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
 	gfs2_glock_dq_wait(&ip->i_iopen_gh);
 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
@@ -1363,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
 	}
 
 	error = gfs2_dinode_dealloc(ip);
-	if (error)
-		goto out_unlock;
+	goto out_unlock;
 
 out_truncate:
 	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
 	if (error)
 		goto out_unlock;
-	/* Needs to be done before glock release & also in a transaction */
-	truncate_inode_pages(&inode->i_data, 0);
+	gfs2_final_release_pages(ip);
 	gfs2_trans_end(sdp);
 
 out_unlock:
@@ -1386,6 +1511,7 @@ out:
 	end_writeback(inode);
 
 	ip->i_gl->gl_object = NULL;
+	gfs2_glock_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
 	if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
 
 static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 {
-	const u8 *uuid = sdp->sd_sb.sb_uuid;
+	struct super_block *s = sdp->sd_vfs;
+	const u8 *uuid = s->s_uuid;
 	buf[0] = '\0';
 	if (!gfs2_uuid_valid(uuid))
 		return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 		       struct kobj_uevent_env *env)
 {
 	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
-	const u8 *uuid = sdp->sd_sb.sb_uuid;
+	struct super_block *s = sdp->sd_vfs;
+	const u8 *uuid = s->s_uuid;
 
 	add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
 	add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
 #include <linux/buffer_head.h>
 #include <linux/dlmconstants.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/writeback.h>
 #include "incore.h"
 #include "glock.h"
 
@@ -40,7 +41,9 @@
 	{(1UL << GLF_REPLY_PENDING),		"r" },		\
 	{(1UL << GLF_INITIAL),			"I" },		\
 	{(1UL << GLF_FROZEN),			"F" },		\
-	{(1UL << GLF_QUEUED),			"q" })
+	{(1UL << GLF_QUEUED),			"q" },		\
+	{(1UL << GLF_LRU),			"L" },		\
+	{(1UL << GLF_OBJECT),			"o" })
 
 #ifndef NUMPTY
 #define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
 		__entry->new_state	= glock_trace_state(new_state);
 		__entry->tgt_state	= glock_trace_state(gl->gl_target);
 		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
-		__entry->flags		= gl->gl_flags;
+		__entry->flags		= gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
 	),
 
 	TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
 		__entry->gltype		= gl->gl_name.ln_type;
 		__entry->glnum		= gl->gl_name.ln_number;
 		__entry->cur_state	= glock_trace_state(gl->gl_state);
-		__entry->flags		= gl->gl_flags;
+		__entry->flags		= gl->gl_flags  | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
 	),
 
 	TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
 		__entry->glnum		= gl->gl_name.ln_number;
 		__entry->cur_state	= glock_trace_state(gl->gl_state);
 		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
-		__entry->flags		= gl->gl_flags;
+		__entry->flags		= gl->gl_flags  | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
 	),
 
 	TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
 		  MINOR(__entry->dev), __entry->blocks)
 );
 
+/* Writing back the AIL */
+TRACE_EVENT(gfs2_ail_flush,
+
+	TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
+
+	TP_ARGS(sdp, wbc, start),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	int, start			)
+		__field(	int, sync_mode			)
+		__field(	long, nr_to_write		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= sdp->sd_vfs->s_dev;
+		__entry->start		= start;
+		__entry->sync_mode	= wbc->sync_mode;
+		__entry->nr_to_write	= wbc->nr_to_write;
+	),
+
+	TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
+		  MINOR(__entry->dev), __entry->start ? "start" : "end",
+		  __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
+		  __entry->nr_to_write)
+);
+
 /* Section 3 - bmap
  *
  * Objectives:
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 0c39dc3ef7d7..56bd15c5bf6c 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,7 +1,6 @@
 config HPFS_FS
 	tristate "OS/2 HPFS file system support"
 	depends on BLOCK
-	depends on BROKEN || !PREEMPT
 	help
 	  OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
 	  is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index 5503e2c28910..7a5eb2c718c8 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -8,8 +8,6 @@
 
 #include "hpfs_fn.h"
 
-static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
-
 /*
  * Check if a sector is allocated in bitmap
  * This is really slow. Turned on only if chk==2
@@ -18,9 +16,9 @@ static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
 static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
 {
 	struct quad_buffer_head qbh;
-	unsigned *bmp;
+	u32 *bmp;
 	if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail;
-	if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f)) & 1) {
+	if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) {
 		hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec);
 		goto fail1;
 	}
@@ -28,7 +26,7 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
 	if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) {
 		unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4;
 		if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail;
-		if ((bmp[ssec >> 5] >> (ssec & 0x1f)) & 1) {
+		if ((le32_to_cpu(bmp[ssec >> 5]) >> (ssec & 0x1f)) & 1) {
 			hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec);
 			goto fail1;
 		}
@@ -75,7 +73,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
 		hpfs_error(s, "Bad allocation size: %d", n);
 		return 0;
 	}
-	lock_super(s);
 	if (bs != ~0x3fff) {
 		if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls;
 	} else {
@@ -85,10 +82,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
 		ret = bs + nr;
 		goto rt;
 	}
-	/*if (!tstbits(bmp, nr + n, n + forward)) {
-		ret = bs + nr + n;
-		goto rt;
-	}*/
 	q = nr + n; b = 0;
 	while ((a = tstbits(bmp, q, n + forward)) != 0) {
 		q += a;
@@ -105,14 +98,14 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
 		goto rt;
 	}
 	nr >>= 5;
-	/*for (i = nr + 1; i != nr; i++, i &= 0x1ff) {*/
+	/*for (i = nr + 1; i != nr; i++, i &= 0x1ff) */
 	i = nr;
 	do {
-		if (!bmp[i]) goto cont;
-		if (n + forward >= 0x3f && bmp[i] != -1) goto cont;
+		if (!le32_to_cpu(bmp[i])) goto cont;
+		if (n + forward >= 0x3f && le32_to_cpu(bmp[i]) != 0xffffffff) goto cont;
 		q = i<<5;
 		if (i > 0) {
-			unsigned k = bmp[i-1];
+			unsigned k = le32_to_cpu(bmp[i-1]);
 			while (k & 0x80000000) {
 				q--; k <<= 1;
 			}
@@ -132,18 +125,17 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
 	} while (i != nr);
 	rt:
 	if (ret) {
-		if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (bmp[(ret & 0x3fff) >> 5] | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
+		if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (le32_to_cpu(bmp[(ret & 0x3fff) >> 5]) | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
 			hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret);
 			ret = 0;
 			goto b;
 		}
-		bmp[(ret & 0x3fff) >> 5] &= ~(((1 << n) - 1) << (ret & 0x1f));
+		bmp[(ret & 0x3fff) >> 5] &= cpu_to_le32(~(((1 << n) - 1) << (ret & 0x1f)));
 		hpfs_mark_4buffers_dirty(&qbh);
 	}
 	b:
 	hpfs_brelse4(&qbh);
 	uls:
-	unlock_super(s);
 	return ret;
 }
 
@@ -155,7 +147,7 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
  *				sectors
  */
 
-secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward, int lock)
+secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward)
 {
 	secno sec;
 	int i;
@@ -167,7 +159,6 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
 		forward = -forward;
 		f_p = 1;
 	}
-	if (lock) hpfs_lock_creation(s);
 	n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14;
 	if (near && near < sbi->sb_fs_size) {
 		if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret;
@@ -214,18 +205,17 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
 	ret:
 	if (sec && f_p) {
 		for (i = 0; i < forward; i++) {
-			if (!hpfs_alloc_if_possible_nolock(s, sec + i + 1)) {
+			if (!hpfs_alloc_if_possible(s, sec + i + 1)) {
 				hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i);
 				sec = 0;
 				break;
 			}
 		}
 	}
-	if (lock) hpfs_unlock_creation(s);
 	return sec;
 }
 
-static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
+static secno alloc_in_dirband(struct super_block *s, secno near)
 {
 	unsigned nr = near;
 	secno sec;
@@ -236,49 +226,35 @@ static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
 		nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4;
 	nr -= sbi->sb_dirband_start;
 	nr >>= 2;
-	if (lock) hpfs_lock_creation(s);
 	sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0);
-	if (lock) hpfs_unlock_creation(s);
 	if (!sec) return 0;
 	return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start;
 }
 
 /* Alloc sector if it's free */
 
-static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec)
+int hpfs_alloc_if_possible(struct super_block *s, secno sec)
 {
 	struct quad_buffer_head qbh;
-	unsigned *bmp;
-	lock_super(s);
+	u32 *bmp;
 	if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end;
-	if (bmp[(sec & 0x3fff) >> 5] & (1 << (sec & 0x1f))) {
-		bmp[(sec & 0x3fff) >> 5] &= ~(1 << (sec & 0x1f));
+	if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) {
+		bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f)));
 		hpfs_mark_4buffers_dirty(&qbh);
 		hpfs_brelse4(&qbh);
-		unlock_super(s);
 		return 1;
 	}
 	hpfs_brelse4(&qbh);
 	end:
-	unlock_super(s);
 	return 0;
 }
 
-int hpfs_alloc_if_possible(struct super_block *s, secno sec)
-{
-	int r;
-	hpfs_lock_creation(s);
-	r = hpfs_alloc_if_possible_nolock(s, sec);
-	hpfs_unlock_creation(s);
-	return r;
-}
-
 /* Free sectors in bitmaps */
 
 void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
 {
 	struct quad_buffer_head qbh;
-	unsigned *bmp;
+	u32 *bmp;
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 	/*printk("2 - ");*/
 	if (!n) return;
@@ -286,26 +262,22 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
 		hpfs_error(s, "Trying to free reserved sector %08x", sec);
 		return;
 	}
-	lock_super(s);
 	sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n;
 	if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff;
 	new_map:
 	if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) {
-		unlock_super(s);
 		return;
 	}	
 	new_tst:
-	if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f) & 1)) {
+	if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f) & 1)) {
 		hpfs_error(s, "sector %08x not allocated", sec);
 		hpfs_brelse4(&qbh);
-		unlock_super(s);
 		return;
 	}
-	bmp[(sec & 0x3fff) >> 5] |= 1 << (sec & 0x1f);
+	bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f));
 	if (!--n) {
 		hpfs_mark_4buffers_dirty(&qbh);
 		hpfs_brelse4(&qbh);
-		unlock_super(s);
 		return;
 	}	
 	if (!(++sec & 0x3fff)) {
@@ -327,13 +299,13 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
 	int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14;
 	int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff;
 	int i, j;
-	unsigned *bmp;
+	u32 *bmp;
 	struct quad_buffer_head qbh;
 	if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
 		for (j = 0; j < 512; j++) {
 			unsigned k;
-			if (!bmp[j]) continue;
-			for (k = bmp[j]; k; k >>= 1) if (k & 1) if (!--n) {
+			if (!le32_to_cpu(bmp[j])) continue;
+			for (k = le32_to_cpu(bmp[j]); k; k >>= 1) if (k & 1) if (!--n) {
 				hpfs_brelse4(&qbh);
 				return 0;
 			}
@@ -352,10 +324,10 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
 	chk_bmp:
 	if (bmp) {
 		for (j = 0; j < 512; j++) {
-			unsigned k;
-			if (!bmp[j]) continue;
+			u32 k;
+			if (!le32_to_cpu(bmp[j])) continue;
 			for (k = 0xf; k; k <<= 4)
-				if ((bmp[j] & k) == k) {
+				if ((le32_to_cpu(bmp[j]) & k) == k) {
 					if (!--n) {
 						hpfs_brelse4(&qbh);
 						return 0;
@@ -379,44 +351,40 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno)
 		hpfs_free_sectors(s, dno, 4);
 	} else {
 		struct quad_buffer_head qbh;
-		unsigned *bmp;
+		u32 *bmp;
 		unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4;
-		lock_super(s);
 		if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
-			unlock_super(s);
 			return;
 		}
-		bmp[ssec >> 5] |= 1 << (ssec & 0x1f);
+		bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f));
 		hpfs_mark_4buffers_dirty(&qbh);
 		hpfs_brelse4(&qbh);
-		unlock_super(s);
 	}
 }
 
 struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near,
-			 dnode_secno *dno, struct quad_buffer_head *qbh,
-			 int lock)
+			 dnode_secno *dno, struct quad_buffer_head *qbh)
 {
 	struct dnode *d;
 	if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) {
-		if (!(*dno = alloc_in_dirband(s, near, lock)))
-			if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) return NULL;
+		if (!(*dno = alloc_in_dirband(s, near)))
+			if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL;
 	} else {
-		if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock)))
-			if (!(*dno = alloc_in_dirband(s, near, lock))) return NULL;
+		if (!(*dno = hpfs_alloc_sector(s, near, 4, 0)))
+			if (!(*dno = alloc_in_dirband(s, near))) return NULL;
 	}
 	if (!(d = hpfs_get_4sectors(s, *dno, qbh))) {
 		hpfs_free_dnode(s, *dno);
 		return NULL;
 	}
 	memset(d, 0, 2048);
-	d->magic = DNODE_MAGIC;
-	d->first_free = 52;
+	d->magic = cpu_to_le32(DNODE_MAGIC);
+	d->first_free = cpu_to_le32(52);
 	d->dirent[0] = 32;
 	d->dirent[2] = 8;
 	d->dirent[30] = 1;
 	d->dirent[31] = 255;
-	d->self = *dno;
+	d->self = cpu_to_le32(*dno);
 	return d;
 }
 
@@ -424,16 +392,16 @@ struct fnode *hpfs_alloc_fnode(struct super_block *s, secno near, fnode_secno *f
 			  struct buffer_head **bh)
 {
 	struct fnode *f;
-	if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD, 1))) return NULL;
+	if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD))) return NULL;
 	if (!(f = hpfs_get_sector(s, *fno, bh))) {
 		hpfs_free_sectors(s, *fno, 1);
 		return NULL;
 	}	
 	memset(f, 0, 512);
-	f->magic = FNODE_MAGIC;
-	f->ea_offs = 0xc4;
+	f->magic = cpu_to_le32(FNODE_MAGIC);
+	f->ea_offs = cpu_to_le16(0xc4);
 	f->btree.n_free_nodes = 8;
-	f->btree.first_free = 8;
+	f->btree.first_free = cpu_to_le16(8);
 	return f;
 }
 
@@ -441,16 +409,16 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a
 			  struct buffer_head **bh)
 {
 	struct anode *a;
-	if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD, 1))) return NULL;
+	if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD))) return NULL;
 	if (!(a = hpfs_get_sector(s, *ano, bh))) {
 		hpfs_free_sectors(s, *ano, 1);
 		return NULL;
 	}
 	memset(a, 0, 512);
-	a->magic = ANODE_MAGIC;
-	a->self = *ano;
+	a->magic = cpu_to_le32(ANODE_MAGIC);
+	a->self = cpu_to_le32(*ano);
 	a->btree.n_free_nodes = 40;
 	a->btree.n_used_nodes = 0;
-	a->btree.first_free = 8;
+	a->btree.first_free = cpu_to_le16(8);
 	return a;
 }
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 6a2f04bf3df0..08b503e8ed29 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -22,8 +22,8 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
 	if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1;
 	if (btree->internal) {
 		for (i = 0; i < btree->n_used_nodes; i++)
-			if (btree->u.internal[i].file_secno > sec) {
-				a = btree->u.internal[i].down;
+			if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) {
+				a = le32_to_cpu(btree->u.internal[i].down);
 				brelse(bh);
 				if (!(anode = hpfs_map_anode(s, a, &bh))) return -1;
 				btree = &anode->btree;
@@ -34,18 +34,18 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
 		return -1;
 	}
 	for (i = 0; i < btree->n_used_nodes; i++)
-		if (btree->u.external[i].file_secno <= sec &&
-		    btree->u.external[i].file_secno + btree->u.external[i].length > sec) {
-			a = btree->u.external[i].disk_secno + sec - btree->u.external[i].file_secno;
+		if (le32_to_cpu(btree->u.external[i].file_secno) <= sec &&
+		    le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > sec) {
+			a = le32_to_cpu(btree->u.external[i].disk_secno) + sec - le32_to_cpu(btree->u.external[i].file_secno);
 			if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) {
 				brelse(bh);
 				return -1;
 			}
 			if (inode) {
 				struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
-				hpfs_inode->i_file_sec = btree->u.external[i].file_secno;
-				hpfs_inode->i_disk_sec = btree->u.external[i].disk_secno;
-				hpfs_inode->i_n_secs = btree->u.external[i].length;
+				hpfs_inode->i_file_sec = le32_to_cpu(btree->u.external[i].file_secno);
+				hpfs_inode->i_disk_sec = le32_to_cpu(btree->u.external[i].disk_secno);
+				hpfs_inode->i_n_secs = le32_to_cpu(btree->u.external[i].length);
 			}
 			brelse(bh);
 			return a;
@@ -83,8 +83,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 		return -1;
 	}
 	if (btree->internal) {
-		a = btree->u.internal[n].down;
-		btree->u.internal[n].file_secno = -1;
+		a = le32_to_cpu(btree->u.internal[n].down);
+		btree->u.internal[n].file_secno = cpu_to_le32(-1);
 		mark_buffer_dirty(bh);
 		brelse(bh);
 		if (hpfs_sb(s)->sb_chk)
@@ -94,15 +94,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 		goto go_down;
 	}
 	if (n >= 0) {
-		if (btree->u.external[n].file_secno + btree->u.external[n].length != fsecno) {
+		if (le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length) != fsecno) {
 			hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x",
-				btree->u.external[n].file_secno + btree->u.external[n].length, fsecno,
+				le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length), fsecno,
 				fnod?'f':'a', node);
 			brelse(bh);
 			return -1;
 		}
-		if (hpfs_alloc_if_possible(s, se = btree->u.external[n].disk_secno + btree->u.external[n].length)) {
-			btree->u.external[n].length++;
+		if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) {
+			btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1);
 			mark_buffer_dirty(bh);
 			brelse(bh);
 			return se;
@@ -115,20 +115,20 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 		}
 		se = !fnod ? node : (node + 16384) & ~16383;
 	}	
-	if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M, 1))) {
+	if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M))) {
 		brelse(bh);
 		return -1;
 	}
-	fs = n < 0 ? 0 : btree->u.external[n].file_secno + btree->u.external[n].length;
+	fs = n < 0 ? 0 : le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length);
 	if (!btree->n_free_nodes) {
-		up = a != node ? anode->up : -1;
+		up = a != node ? le32_to_cpu(anode->up) : -1;
 		if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) {
 			brelse(bh);
 			hpfs_free_sectors(s, se, 1);
 			return -1;
 		}
 		if (a == node && fnod) {
-			anode->up = node;
+			anode->up = cpu_to_le32(node);
 			anode->btree.fnode_parent = 1;
 			anode->btree.n_used_nodes = btree->n_used_nodes;
 			anode->btree.first_free = btree->first_free;
@@ -137,9 +137,9 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 			btree->internal = 1;
 			btree->n_free_nodes = 11;
 			btree->n_used_nodes = 1;
-			btree->first_free = (char *)&(btree->u.internal[1]) - (char *)btree;
-			btree->u.internal[0].file_secno = -1;
-			btree->u.internal[0].down = na;
+			btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree);
+			btree->u.internal[0].file_secno = cpu_to_le32(-1);
+			btree->u.internal[0].down = cpu_to_le32(na);
 			mark_buffer_dirty(bh);
 		} else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) {
 			brelse(bh);
@@ -153,15 +153,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 		btree = &anode->btree;
 	}
 	btree->n_free_nodes--; n = btree->n_used_nodes++;
-	btree->first_free += 12;
-	btree->u.external[n].disk_secno = se;
-	btree->u.external[n].file_secno = fs;
-	btree->u.external[n].length = 1;
+	btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12);
+	btree->u.external[n].disk_secno = cpu_to_le32(se);
+	btree->u.external[n].file_secno = cpu_to_le32(fs);
+	btree->u.external[n].length = cpu_to_le32(1);
 	mark_buffer_dirty(bh);
 	brelse(bh);
 	if ((a == node && fnod) || na == -1) return se;
 	c2 = 0;
-	while (up != -1) {
+	while (up != (anode_secno)-1) {
 		struct anode *new_anode;
 		if (hpfs_sb(s)->sb_chk)
 			if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1;
@@ -174,47 +174,47 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 		}
 		if (btree->n_free_nodes) {
 			btree->n_free_nodes--; n = btree->n_used_nodes++;
-			btree->first_free += 8;
-			btree->u.internal[n].file_secno = -1;
-			btree->u.internal[n].down = na;
-			btree->u.internal[n-1].file_secno = fs;
+			btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8);
+			btree->u.internal[n].file_secno = cpu_to_le32(-1);
+			btree->u.internal[n].down = cpu_to_le32(na);
+			btree->u.internal[n-1].file_secno = cpu_to_le32(fs);
 			mark_buffer_dirty(bh);
 			brelse(bh);
 			brelse(bh2);
 			hpfs_free_sectors(s, ra, 1);
 			if ((anode = hpfs_map_anode(s, na, &bh))) {
-				anode->up = up;
+				anode->up = cpu_to_le32(up);
 				anode->btree.fnode_parent = up == node && fnod;
 				mark_buffer_dirty(bh);
 				brelse(bh);
 			}
 			return se;
 		}
-		up = up != node ? anode->up : -1;
-		btree->u.internal[btree->n_used_nodes - 1].file_secno = /*fs*/-1;
+		up = up != node ? le32_to_cpu(anode->up) : -1;
+		btree->u.internal[btree->n_used_nodes - 1].file_secno = cpu_to_le32(/*fs*/-1);
 		mark_buffer_dirty(bh);
 		brelse(bh);
 		a = na;
 		if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) {
 			anode = new_anode;
-			/*anode->up = up != -1 ? up : ra;*/
+			/*anode->up = cpu_to_le32(up != -1 ? up : ra);*/
 			anode->btree.internal = 1;
 			anode->btree.n_used_nodes = 1;
 			anode->btree.n_free_nodes = 59;
-			anode->btree.first_free = 16;
-			anode->btree.u.internal[0].down = a;
-			anode->btree.u.internal[0].file_secno = -1;
+			anode->btree.first_free = cpu_to_le16(16);
+			anode->btree.u.internal[0].down = cpu_to_le32(a);
+			anode->btree.u.internal[0].file_secno = cpu_to_le32(-1);
 			mark_buffer_dirty(bh);
 			brelse(bh);
 			if ((anode = hpfs_map_anode(s, a, &bh))) {
-				anode->up = na;
+				anode->up = cpu_to_le32(na);
 				mark_buffer_dirty(bh);
 				brelse(bh);
 			}
 		} else na = a;
 	}
 	if ((anode = hpfs_map_anode(s, na, &bh))) {
-		anode->up = node;
+		anode->up = cpu_to_le32(node);
 		if (fnod) anode->btree.fnode_parent = 1;
 		mark_buffer_dirty(bh);
 		brelse(bh);
@@ -232,14 +232,14 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 		}
 		btree = &fnode->btree;
 	}
-	ranode->up = node;
-	memcpy(&ranode->btree, btree, btree->first_free);
+	ranode->up = cpu_to_le32(node);
+	memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free));
 	if (fnod) ranode->btree.fnode_parent = 1;
 	ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes;
 	if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) {
 		struct anode *unode;
-		if ((unode = hpfs_map_anode(s, ranode->u.internal[n].down, &bh1))) {
-			unode->up = ra;
+		if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) {
+			unode->up = cpu_to_le32(ra);
 			unode->btree.fnode_parent = 0;
 			mark_buffer_dirty(bh1);
 			brelse(bh1);
@@ -248,11 +248,11 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
 	btree->internal = 1;
 	btree->n_free_nodes = fnod ? 10 : 58;
 	btree->n_used_nodes = 2;
-	btree->first_free = (char *)&btree->u.internal[2] - (char *)btree;
-	btree->u.internal[0].file_secno = fs;
-	btree->u.internal[0].down = ra;
-	btree->u.internal[1].file_secno = -1;
-	btree->u.internal[1].down = na;
+	btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree);
+	btree->u.internal[0].file_secno = cpu_to_le32(fs);
+	btree->u.internal[0].down = cpu_to_le32(ra);
+	btree->u.internal[1].file_secno = cpu_to_le32(-1);
+	btree->u.internal[1].down = cpu_to_le32(na);
 	mark_buffer_dirty(bh);
 	brelse(bh);
 	mark_buffer_dirty(bh2);
@@ -279,7 +279,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
 	go_down:
 	d2 = 0;
 	while (btree1->internal) {
-		ano = btree1->u.internal[pos].down;
+		ano = le32_to_cpu(btree1->u.internal[pos].down);
 		if (level) brelse(bh);
 		if (hpfs_sb(s)->sb_chk)
 			if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1"))
@@ -290,7 +290,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
 		pos = 0;
 	}
 	for (i = 0; i < btree1->n_used_nodes; i++)
-		hpfs_free_sectors(s, btree1->u.external[i].disk_secno, btree1->u.external[i].length);
+		hpfs_free_sectors(s, le32_to_cpu(btree1->u.external[i].disk_secno), le32_to_cpu(btree1->u.external[i].length));
 	go_up:
 	if (!level) return;
 	brelse(bh);
@@ -298,13 +298,13 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
 		if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return;
 	hpfs_free_sectors(s, ano, 1);
 	oano = ano;
-	ano = anode->up;
+	ano = le32_to_cpu(anode->up);
 	if (--level) {
 		if (!(anode = hpfs_map_anode(s, ano, &bh))) return;
 		btree1 = &anode->btree;
 	} else btree1 = btree;
 	for (i = 0; i < btree1->n_used_nodes; i++) {
-		if (btree1->u.internal[i].down == oano) {
+		if (le32_to_cpu(btree1->u.internal[i].down) == oano) {
 			if ((pos = i + 1) < btree1->n_used_nodes)
 				goto go_down;
 			else
@@ -411,7 +411,7 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
 		if (fno) {
 			btree->n_free_nodes = 8;
 			btree->n_used_nodes = 0;
-			btree->first_free = 8;
+			btree->first_free = cpu_to_le16(8);
 			btree->internal = 0;
 			mark_buffer_dirty(bh);
 		} else hpfs_free_sectors(s, f, 1);
@@ -421,22 +421,22 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
 	while (btree->internal) {
 		nodes = btree->n_used_nodes + btree->n_free_nodes;
 		for (i = 0; i < btree->n_used_nodes; i++)
-			if (btree->u.internal[i].file_secno >= secs) goto f;
+			if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f;
 		brelse(bh);
 		hpfs_error(s, "internal btree %08x doesn't end with -1", node);
 		return;
 		f:
 		for (j = i + 1; j < btree->n_used_nodes; j++)
-			hpfs_ea_remove(s, btree->u.internal[j].down, 1, 0);
+			hpfs_ea_remove(s, le32_to_cpu(btree->u.internal[j].down), 1, 0);
 		btree->n_used_nodes = i + 1;
 		btree->n_free_nodes = nodes - btree->n_used_nodes;
-		btree->first_free = 8 + 8 * btree->n_used_nodes;
+		btree->first_free = cpu_to_le16(8 + 8 * btree->n_used_nodes);
 		mark_buffer_dirty(bh);
-		if (btree->u.internal[i].file_secno == secs) {
+		if (btree->u.internal[i].file_secno == cpu_to_le32(secs)) {
 			brelse(bh);
 			return;
 		}
-		node = btree->u.internal[i].down;
+		node = le32_to_cpu(btree->u.internal[i].down);
 		brelse(bh);
 		if (hpfs_sb(s)->sb_chk)
 			if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree"))
@@ -446,25 +446,25 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
 	}	
 	nodes = btree->n_used_nodes + btree->n_free_nodes;
 	for (i = 0; i < btree->n_used_nodes; i++)
-		if (btree->u.external[i].file_secno + btree->u.external[i].length >= secs) goto ff;
+		if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) >= secs) goto ff;
 	brelse(bh);
 	return;
 	ff:
-	if (secs <= btree->u.external[i].file_secno) {
+	if (secs <= le32_to_cpu(btree->u.external[i].file_secno)) {
 		hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs);
 		if (i) i--;
 	}
-	else if (btree->u.external[i].file_secno + btree->u.external[i].length > secs) {
-		hpfs_free_sectors(s, btree->u.external[i].disk_secno + secs -
-			btree->u.external[i].file_secno, btree->u.external[i].length
-			- secs + btree->u.external[i].file_secno); /* I hope gcc optimizes this :-) */
-		btree->u.external[i].length = secs - btree->u.external[i].file_secno;
+	else if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > secs) {
+		hpfs_free_sectors(s, le32_to_cpu(btree->u.external[i].disk_secno) + secs -
+			le32_to_cpu(btree->u.external[i].file_secno), le32_to_cpu(btree->u.external[i].length)
+			- secs + le32_to_cpu(btree->u.external[i].file_secno)); /* I hope gcc optimizes this :-) */
+		btree->u.external[i].length = cpu_to_le32(secs - le32_to_cpu(btree->u.external[i].file_secno));
 	}
 	for (j = i + 1; j < btree->n_used_nodes; j++)
-		hpfs_free_sectors(s, btree->u.external[j].disk_secno, btree->u.external[j].length);
+		hpfs_free_sectors(s, le32_to_cpu(btree->u.external[j].disk_secno), le32_to_cpu(btree->u.external[j].length));
 	btree->n_used_nodes = i + 1;
 	btree->n_free_nodes = nodes - btree->n_used_nodes;
-	btree->first_free = 8 + 12 * btree->n_used_nodes;
+	btree->first_free = cpu_to_le16(8 + 12 * btree->n_used_nodes);
 	mark_buffer_dirty(bh);
 	brelse(bh);
 }
@@ -480,12 +480,12 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno)
 	struct extended_attribute *ea_end;
 	if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return;
 	if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree);
-	else hpfs_remove_dtree(s, fnode->u.external[0].disk_secno);
+	else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno));
 	ea_end = fnode_end_ea(fnode);
 	for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
 		if (ea->indirect)
 			hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
-	hpfs_ea_ext_remove(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l);
+	hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l));
 	brelse(bh);
 	hpfs_free_sectors(s, fno, 1);
 }
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index 793cb9d943d2..9ecde27d1e29 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -9,22 +9,6 @@
 #include <linux/slab.h>
 #include "hpfs_fn.h"
 
-void hpfs_lock_creation(struct super_block *s)
-{
-#ifdef DEBUG_LOCKS
-	printk("lock creation\n");
-#endif
-	mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
-}
-
-void hpfs_unlock_creation(struct super_block *s)
-{
-#ifdef DEBUG_LOCKS
-	printk("unlock creation\n");
-#endif
-	mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
-}
-
 /* Map a sector into a buffer and return pointers to it and to the buffer. */
 
 void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp,
@@ -32,6 +16,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head
 {
 	struct buffer_head *bh;
 
+	hpfs_lock_assert(s);
+
 	cond_resched();
 
 	*bhp = bh = sb_bread(s, secno);
@@ -50,6 +36,8 @@ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head
 	struct buffer_head *bh;
 	/*return hpfs_map_sector(s, secno, bhp, 0);*/
 
+	hpfs_lock_assert(s);
+
 	cond_resched();
 
 	if ((*bhp = bh = sb_getblk(s, secno)) != NULL) {
@@ -70,6 +58,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe
 	struct buffer_head *bh;
 	char *data;
 
+	hpfs_lock_assert(s);
+
 	cond_resched();
 
 	if (secno & 3) {
@@ -125,6 +115,8 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno,
 {
 	cond_resched();
 
+	hpfs_lock_assert(s);
+
 	if (secno & 3) {
 		printk("HPFS: hpfs_get_4sectors: unaligned read\n");
 		return NULL;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b3d7c0ddb609..f46ae025bfb5 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -88,9 +88,9 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
 					(unsigned long)inode->i_ino);
 		}
-		if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) {
+		if (hpfs_inode->i_dno != le32_to_cpu(fno->u.external[0].disk_secno)) {
 			e = 1;
-			hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, fno->u.external[0].disk_secno);
+			hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, le32_to_cpu(fno->u.external[0].disk_secno));
 		}
 		brelse(bh);
 		if (e) {
@@ -156,7 +156,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			goto again;
 		}
 		tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3);
-		if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) {
+		if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) {
 			filp->f_pos = old_pos;
 			if (tempname != de->name) kfree(tempname);
 			hpfs_brelse4(&qbh);
@@ -221,7 +221,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
 	 * Get inode number, what we're after.
 	 */
 
-	ino = de->fnode;
+	ino = le32_to_cpu(de->fnode);
 
 	/*
 	 * Go find or make an inode.
@@ -236,7 +236,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
 		hpfs_init_inode(result);
 		if (de->directory)
 			hpfs_read_inode(result);
-		else if (de->ea_size && hpfs_sb(dir->i_sb)->sb_eas)
+		else if (le32_to_cpu(de->ea_size) && hpfs_sb(dir->i_sb)->sb_eas)
 			hpfs_read_inode(result);
 		else {
 			result->i_mode |= S_IFREG;
@@ -250,8 +250,6 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
 	hpfs_result = hpfs_i(result);
 	if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino;
 
-	hpfs_decide_conv(result, name, len);
-
 	if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) {
 		hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
 		goto bail1;
@@ -263,19 +261,19 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
 	 */
 
 	if (!result->i_ctime.tv_sec) {
-		if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, de->creation_date)))
+		if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
 			result->i_ctime.tv_sec = 1;
 		result->i_ctime.tv_nsec = 0;
-		result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, de->write_date);
+		result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
 		result->i_mtime.tv_nsec = 0;
-		result->i_atime.tv_sec = local_to_gmt(dir->i_sb, de->read_date);
+		result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
 		result->i_atime.tv_nsec = 0;
-		hpfs_result->i_ea_size = de->ea_size;
+		hpfs_result->i_ea_size = le32_to_cpu(de->ea_size);
 		if (!hpfs_result->i_ea_mode && de->read_only)
 			result->i_mode &= ~0222;
 		if (!de->directory) {
 			if (result->i_size == -1) {
-				result->i_size = de->file_size;
+				result->i_size = le32_to_cpu(de->file_size);
 				result->i_data.a_ops = &hpfs_aops;
 				hpfs_i(result)->mmu_private = result->i_size;
 			/*
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 9b2ffadfc8c4..1e0e2ac30fd3 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -14,11 +14,11 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde)
 	struct hpfs_dirent *de_end = dnode_end_de(d);
 	int i = 1;
 	for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
-		if (de == fde) return ((loff_t) d->self << 4) | (loff_t)i;
+		if (de == fde) return ((loff_t) le32_to_cpu(d->self) << 4) | (loff_t)i;
 		i++;
 	}
 	printk("HPFS: get_pos: not_found\n");
-	return ((loff_t)d->self << 4) | (loff_t)1;
+	return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1;
 }
 
 void hpfs_add_pos(struct inode *inode, loff_t *pos)
@@ -130,29 +130,30 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno
 {
 	struct hpfs_dirent *de;
 	if (!(de = dnode_last_de(d))) {
-		hpfs_error(s, "set_last_pointer: empty dnode %08x", d->self);
+		hpfs_error(s, "set_last_pointer: empty dnode %08x", le32_to_cpu(d->self));
 		return;
 	}
 	if (hpfs_sb(s)->sb_chk) {
 		if (de->down) {
 			hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x",
-				d->self, de_down_pointer(de));
+				le32_to_cpu(d->self), de_down_pointer(de));
 			return;
 		}
-		if (de->length != 32) {
-			hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", d->self);
+		if (le16_to_cpu(de->length) != 32) {
+			hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", le32_to_cpu(d->self));
 			return;
 		}
 	}
 	if (ptr) {
-		if ((d->first_free += 4) > 2048) {
-			hpfs_error(s,"set_last_pointer: too long dnode %08x", d->self);
-			d->first_free -= 4;
+		d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4);
+		if (le32_to_cpu(d->first_free) > 2048) {
+			hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self));
+			d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4);
 			return;
 		}
-		de->length = 36;
+		de->length = cpu_to_le16(36);
 		de->down = 1;
-		*(dnode_secno *)((char *)de + 32) = ptr;
+		*(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr);
 	}
 }
 
@@ -168,7 +169,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
 	for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
 		int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last);
 		if (!c) {
-			hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, d->self);
+			hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, le32_to_cpu(d->self));
 			return NULL;
 		}
 		if (c < 0) break;
@@ -176,15 +177,14 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
 	memmove((char *)de + d_size, de, (char *)de_end - (char *)de);
 	memset(de, 0, d_size);
 	if (down_ptr) {
-		*(int *)((char *)de + d_size - 4) = down_ptr;
+		*(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr);
 		de->down = 1;
 	}
-	de->length = d_size;
-	if (down_ptr) de->down = 1;
+	de->length = cpu_to_le16(d_size);
 	de->not_8x3 = hpfs_is_name_long(name, namelen);
 	de->namelen = namelen;
 	memcpy(de->name, name, namelen);
-	d->first_free += d_size;
+	d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size);
 	return de;
 }
 
@@ -194,25 +194,25 @@ static void hpfs_delete_de(struct super_block *s, struct dnode *d,
 			   struct hpfs_dirent *de)
 {
 	if (de->last) {
-		hpfs_error(s, "attempt to delete last dirent in dnode %08x", d->self);
+		hpfs_error(s, "attempt to delete last dirent in dnode %08x", le32_to_cpu(d->self));
 		return;
 	}
-	d->first_free -= de->length;
-	memmove(de, de_next_de(de), d->first_free + (char *)d - (char *)de);
+	d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - le16_to_cpu(de->length));
+	memmove(de, de_next_de(de), le32_to_cpu(d->first_free) + (char *)d - (char *)de);
 }
 
 static void fix_up_ptrs(struct super_block *s, struct dnode *d)
 {
 	struct hpfs_dirent *de;
 	struct hpfs_dirent *de_end = dnode_end_de(d);
-	dnode_secno dno = d->self;
+	dnode_secno dno = le32_to_cpu(d->self);
 	for (de = dnode_first_de(d); de < de_end; de = de_next_de(de))
 		if (de->down) {
 			struct quad_buffer_head qbh;
 			struct dnode *dd;
 			if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) {
-				if (dd->up != dno || dd->root_dnode) {
-					dd->up = dno;
+				if (le32_to_cpu(dd->up) != dno || dd->root_dnode) {
+					dd->up = cpu_to_le32(dno);
 					dd->root_dnode = 0;
 					hpfs_mark_4buffers_dirty(&qbh);
 				}
@@ -262,7 +262,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
 			kfree(nname);
 			return 1;
 		}
-	if (d->first_free + de_size(namelen, down_ptr) <= 2048) {
+	if (le32_to_cpu(d->first_free) + de_size(namelen, down_ptr) <= 2048) {
 		loff_t t;
 		copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de);
 		t = get_pos(d, de);
@@ -286,11 +286,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
 		kfree(nname);
 		return 1;
 	}	
-	memcpy(nd, d, d->first_free);
+	memcpy(nd, d, le32_to_cpu(d->first_free));
 	copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de);
 	for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1);
 	h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10;
-	if (!(ad = hpfs_alloc_dnode(i->i_sb, d->up, &adno, &qbh1, 0))) {
+	if (!(ad = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &adno, &qbh1))) {
 		hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
 		hpfs_brelse4(&qbh);
 		kfree(nd);
@@ -313,20 +313,21 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
 	down_ptr = adno;
 	set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
 	de = de_next_de(de);
-	memmove((char *)nd + 20, de, nd->first_free + (char *)nd - (char *)de);
-	nd->first_free -= (char *)de - (char *)nd - 20;
-	memcpy(d, nd, nd->first_free);
+	memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de);
+	nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20));
+	memcpy(d, nd, le32_to_cpu(nd->first_free));
 	for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos);
 	fix_up_ptrs(i->i_sb, ad);
 	if (!d->root_dnode) {
-		dno = ad->up = d->up;
+		ad->up = d->up;
+		dno = le32_to_cpu(ad->up);
 		hpfs_mark_4buffers_dirty(&qbh);
 		hpfs_brelse4(&qbh);
 		hpfs_mark_4buffers_dirty(&qbh1);
 		hpfs_brelse4(&qbh1);
 		goto go_up;
 	}
-	if (!(rd = hpfs_alloc_dnode(i->i_sb, d->up, &rdno, &qbh2, 0))) {
+	if (!(rd = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &rdno, &qbh2))) {
 		hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
 		hpfs_brelse4(&qbh);
 		hpfs_brelse4(&qbh1);
@@ -338,7 +339,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
 	i->i_blocks += 4;
 	rd->root_dnode = 1;
 	rd->up = d->up;
-	if (!(fnode = hpfs_map_fnode(i->i_sb, d->up, &bh))) {
+	if (!(fnode = hpfs_map_fnode(i->i_sb, le32_to_cpu(d->up), &bh))) {
 		hpfs_free_dnode(i->i_sb, rdno);
 		hpfs_brelse4(&qbh);
 		hpfs_brelse4(&qbh1);
@@ -347,10 +348,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
 		kfree(nname);
 		return 1;
 	}
-	fnode->u.external[0].disk_secno = rdno;
+	fnode->u.external[0].disk_secno = cpu_to_le32(rdno);
 	mark_buffer_dirty(bh);
 	brelse(bh);
-	d->up = ad->up = hpfs_i(i)->i_dno = rdno;
+	hpfs_i(i)->i_dno = rdno;
+	d->up = ad->up = cpu_to_le32(rdno);
 	d->root_dnode = ad->root_dnode = 0;
 	hpfs_mark_4buffers_dirty(&qbh);
 	hpfs_brelse4(&qbh);
@@ -373,7 +375,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
 
 int hpfs_add_dirent(struct inode *i,
 		    const unsigned char *name, unsigned namelen,
-		    struct hpfs_dirent *new_de, int cdepth)
+		    struct hpfs_dirent *new_de)
 {
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
 	struct dnode *d;
@@ -403,7 +405,6 @@ int hpfs_add_dirent(struct inode *i,
 		}
 	}
 	hpfs_brelse4(&qbh);
-	if (!cdepth) hpfs_lock_creation(i->i_sb);
 	if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) {
 		c = 1;
 		goto ret;
@@ -411,7 +412,6 @@ int hpfs_add_dirent(struct inode *i,
 	i->i_version++;
 	c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
 	ret:
-	if (!cdepth) hpfs_unlock_creation(i->i_sb);
 	return c;
 }
 
@@ -437,9 +437,9 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
 				return 0;
 		if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0;
 		if (hpfs_sb(i->i_sb)->sb_chk) {
-			if (dnode->up != chk_up) {
+			if (le32_to_cpu(dnode->up) != chk_up) {
 				hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x",
-					dno, chk_up, dnode->up);
+					dno, chk_up, le32_to_cpu(dnode->up));
 				hpfs_brelse4(&qbh);
 				return 0;
 			}
@@ -455,7 +455,7 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
 		hpfs_brelse4(&qbh);
 	}
 	while (!(de = dnode_pre_last_de(dnode))) {
-		dnode_secno up = dnode->up;
+		dnode_secno up = le32_to_cpu(dnode->up);
 		hpfs_brelse4(&qbh);
 		hpfs_free_dnode(i->i_sb, dno);
 		i->i_size -= 2048;
@@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
 			hpfs_brelse4(&qbh);
 			return 0;
 		}
-		dnode->first_free -= 4;
-		de->length -= 4;
+		dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
+		de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
 		de->down = 0;
 		hpfs_mark_4buffers_dirty(&qbh);
 		dno = up;
@@ -483,12 +483,12 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
 	t = get_pos(dnode, de);
 	for_all_poss(i, hpfs_pos_subst, t, 4);
 	for_all_poss(i, hpfs_pos_subst, t + 1, 5);
-	if (!(nde = kmalloc(de->length, GFP_NOFS))) {
+	if (!(nde = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
 		hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted");
 		hpfs_brelse4(&qbh);
 		return 0;
 	}
-	memcpy(nde, de, de->length);
+	memcpy(nde, de, le16_to_cpu(de->length));
 	ddno = de->down ? de_down_pointer(de) : 0;
 	hpfs_delete_de(i->i_sb, dnode, de);
 	set_last_pointer(i->i_sb, dnode, ddno);
@@ -517,11 +517,11 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 	try_it_again:
 	if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return;
 	if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return;
-	if (dnode->first_free > 56) goto end;
-	if (dnode->first_free == 52 || dnode->first_free == 56) {
+	if (le32_to_cpu(dnode->first_free) > 56) goto end;
+	if (le32_to_cpu(dnode->first_free) == 52 || le32_to_cpu(dnode->first_free) == 56) {
 		struct hpfs_dirent *de_end;
 		int root = dnode->root_dnode;
-		up = dnode->up;
+		up = le32_to_cpu(dnode->up);
 		de = dnode_first_de(dnode);
 		down = de->down ? de_down_pointer(de) : 0;
 		if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) {
@@ -545,13 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 				return;
 			    }
 			if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
-				d1->up = up;
+				d1->up = cpu_to_le32(up);
 				d1->root_dnode = 1;
 				hpfs_mark_4buffers_dirty(&qbh1);
 				hpfs_brelse4(&qbh1);
 			}
 			if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) {
-				fnode->u.external[0].disk_secno = down;
+				fnode->u.external[0].disk_secno = cpu_to_le32(down);
 				mark_buffer_dirty(bh);
 				brelse(bh);
 			}
@@ -570,22 +570,22 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 		for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p);
 		if (!down) {
 			de->down = 0;
-			de->length -= 4;
-			dnode->first_free -= 4;
+			de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
+			dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
 			memmove(de_next_de(de), (char *)de_next_de(de) + 4,
-				(char *)dnode + dnode->first_free - (char *)de_next_de(de));
+				(char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de));
 		} else {
 			struct dnode *d1;
 			struct quad_buffer_head qbh1;
-			*(dnode_secno *) ((void *) de + de->length - 4) = down;
+			*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4) = down;
 			if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
-				d1->up = up;
+				d1->up = cpu_to_le32(up);
 				hpfs_mark_4buffers_dirty(&qbh1);
 				hpfs_brelse4(&qbh1);
 			}
 		}
 	} else {
-		hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, dnode->first_free);
+		hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, le32_to_cpu(dnode->first_free));
 		goto end;
 	}
 
@@ -596,18 +596,18 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 		struct quad_buffer_head qbh1;
 		if (!de_next->down) goto endm;
 		ndown = de_down_pointer(de_next);
-		if (!(de_cp = kmalloc(de->length, GFP_NOFS))) {
+		if (!(de_cp = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
 			printk("HPFS: out of memory for dtree balancing\n");
 			goto endm;
 		}
-		memcpy(de_cp, de, de->length);
+		memcpy(de_cp, de, le16_to_cpu(de->length));
 		hpfs_delete_de(i->i_sb, dnode, de);
 		hpfs_mark_4buffers_dirty(&qbh);
 		hpfs_brelse4(&qbh);
 		for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4);
 		for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1);
 		if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) {
-			d1->up = ndown;
+			d1->up = cpu_to_le32(ndown);
 			hpfs_mark_4buffers_dirty(&qbh1);
 			hpfs_brelse4(&qbh1);
 		}
@@ -635,7 +635,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 			struct hpfs_dirent *del = dnode_last_de(d1);
 			dlp = del->down ? de_down_pointer(del) : 0;
 			if (!dlp && down) {
-				if (d1->first_free > 2044) {
+				if (le32_to_cpu(d1->first_free) > 2044) {
 					if (hpfs_sb(i->i_sb)->sb_chk >= 2) {
 						printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
 						printk("HPFS: warning: terminating balancing operation\n");
@@ -647,38 +647,38 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 					printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
 					printk("HPFS: warning: goin'on\n");
 				}
-				del->length += 4;
+				del->length = cpu_to_le16(le16_to_cpu(del->length) + 4);
 				del->down = 1;
-				d1->first_free += 4;
+				d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4);
 			}
 			if (dlp && !down) {
-				del->length -= 4;
+				del->length = cpu_to_le16(le16_to_cpu(del->length) - 4);
 				del->down = 0;
-				d1->first_free -= 4;
+				d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4);
 			} else if (down)
-				*(dnode_secno *) ((void *) del + del->length - 4) = down;
+				*(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down);
 		} else goto endm;
-		if (!(de_cp = kmalloc(de_prev->length, GFP_NOFS))) {
+		if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) {
 			printk("HPFS: out of memory for dtree balancing\n");
 			hpfs_brelse4(&qbh1);
 			goto endm;
 		}
 		hpfs_mark_4buffers_dirty(&qbh1);
 		hpfs_brelse4(&qbh1);
-		memcpy(de_cp, de_prev, de_prev->length);
+		memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length));
 		hpfs_delete_de(i->i_sb, dnode, de_prev);
 		if (!de_prev->down) {
-			de_prev->length += 4;
+			de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4);
 			de_prev->down = 1;
-			dnode->first_free += 4;
+			dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4);
 		}
-		*(dnode_secno *) ((void *) de_prev + de_prev->length - 4) = ndown;
+		*(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown);
 		hpfs_mark_4buffers_dirty(&qbh);
 		hpfs_brelse4(&qbh);
 		for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4);
 		for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1));
 		if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) {
-			d1->up = ndown;
+			d1->up = cpu_to_le32(ndown);
 			hpfs_mark_4buffers_dirty(&qbh1);
 			hpfs_brelse4(&qbh1);
 		}
@@ -701,7 +701,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
 {
 	struct dnode *dnode = qbh->data;
 	dnode_secno down = 0;
-	int lock = 0;
 	loff_t t;
 	if (de->first || de->last) {
 		hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno);
@@ -710,11 +709,8 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
 	}
 	if (de->down) down = de_down_pointer(de);
 	if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) {
-		lock = 1;
-		hpfs_lock_creation(i->i_sb);
 		if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) {
 			hpfs_brelse4(qbh);
-			hpfs_unlock_creation(i->i_sb);
 			return 2;
 		}
 	}
@@ -727,11 +723,9 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
 		dnode_secno a = move_to_top(i, down, dno);
 		for_all_poss(i, hpfs_pos_subst, 5, t);
 		if (a) delete_empty_dnode(i, a);
-		if (lock) hpfs_unlock_creation(i->i_sb);
 		return !a;
 	}
 	delete_empty_dnode(i, dno);
-	if (lock) hpfs_unlock_creation(i->i_sb);
 	return 0;
 }
 
@@ -751,8 +745,8 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
 	ptr = 0;
 	go_up:
 	if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return;
-	if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && dnode->up != odno)
-		hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, dnode->up);
+	if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && le32_to_cpu(dnode->up) != odno)
+		hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, le32_to_cpu(dnode->up));
 	de = dnode_first_de(dnode);
 	if (ptr) while(1) {
 		if (de->down) if (de_down_pointer(de) == ptr) goto process_de;
@@ -776,7 +770,7 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
 	if (!de->first && !de->last && n_items) (*n_items)++;
 	if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de;
 	ptr = dno;
-	dno = dnode->up;
+	dno = le32_to_cpu(dnode->up);
 	if (dnode->root_dnode) {
 		hpfs_brelse4(&qbh);
 		return;
@@ -824,8 +818,8 @@ dnode_secno hpfs_de_as_down_as_possible(struct super_block *s, dnode_secno dno)
 			return d;
 	if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno;
 	if (hpfs_sb(s)->sb_chk)
-		if (up && ((struct dnode *)qbh.data)->up != up)
-			hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, ((struct dnode *)qbh.data)->up);
+		if (up && le32_to_cpu(((struct dnode *)qbh.data)->up) != up)
+			hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, le32_to_cpu(((struct dnode *)qbh.data)->up));
 	if (!de->down) {
 		hpfs_brelse4(&qbh);
 		return d;
@@ -874,7 +868,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
 	/* Going up */
 	if (dnode->root_dnode) goto bail;
 
-	if (!(up_dnode = hpfs_map_dnode(inode->i_sb, dnode->up, &qbh0)))
+	if (!(up_dnode = hpfs_map_dnode(inode->i_sb, le32_to_cpu(dnode->up), &qbh0)))
 		goto bail;
 
 	end_up_de = dnode_end_de(up_dnode);
@@ -882,16 +876,16 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
 	for (up_de = dnode_first_de(up_dnode); up_de < end_up_de;
 	     up_de = de_next_de(up_de)) {
 		if (!(++c & 077)) hpfs_error(inode->i_sb,
-			"map_pos_dirent: pos crossed dnode boundary; dnode = %08x", dnode->up);
+			"map_pos_dirent: pos crossed dnode boundary; dnode = %08x", le32_to_cpu(dnode->up));
 		if (up_de->down && de_down_pointer(up_de) == dno) {
-			*posp = ((loff_t) dnode->up << 4) + c;
+			*posp = ((loff_t) le32_to_cpu(dnode->up) << 4) + c;
 			hpfs_brelse4(&qbh0);
 			return de;
 		}
 	}
 	
 	hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x",
-		dno, dnode->up);
+		dno, le32_to_cpu(dnode->up));
 	hpfs_brelse4(&qbh0);
 	
 	bail:
@@ -1017,17 +1011,17 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
 		/*name2[15] = 0xff;*/
 		name1len = 15; name2len = 256;
 	}
-	if (!(upf = hpfs_map_fnode(s, f->up, &bh))) {
+	if (!(upf = hpfs_map_fnode(s, le32_to_cpu(f->up), &bh))) {
 		kfree(name2);
 		return NULL;
 	}	
 	if (!upf->dirflag) {
 		brelse(bh);
-		hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, f->up);
+		hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up));
 		kfree(name2);
 		return NULL;
 	}
-	dno = upf->u.external[0].disk_secno;
+	dno = le32_to_cpu(upf->u.external[0].disk_secno);
 	brelse(bh);
 	go_down:
 	downd = 0;
@@ -1049,7 +1043,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
 		return NULL;
 	}
 	next_de:
-	if (de->fnode == fno) {
+	if (le32_to_cpu(de->fnode) == fno) {
 		kfree(name2);
 		return de;
 	}
@@ -1065,7 +1059,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
 		goto go_down;
 	}
 	f:
-	if (de->fnode == fno) {
+	if (le32_to_cpu(de->fnode) == fno) {
 		kfree(name2);
 		return de;
 	}
@@ -1074,7 +1068,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
 	if ((de = de_next_de(de)) < de_end) goto next_de;
 	if (d->root_dnode) goto not_found;
 	downd = dno;
-	dno = d->up;
+	dno = le32_to_cpu(d->up);
 	hpfs_brelse4(qbh);
 	if (hpfs_sb(s)->sb_chk)
 		if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) {
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 45e53d972b42..d8b84d113c89 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -24,7 +24,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
 		}
 		if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return;
 		if (ea->indirect) {
-			if (ea->valuelen != 8) {
+			if (ea_valuelen(ea) != 8) {
 				hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x",
 					ano ? "anode" : "sectors", a, pos);
 				return;
@@ -33,7 +33,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
 				return;
 			hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
 		}
-		pos += ea->namelen + ea->valuelen + 5;
+		pos += ea->namelen + ea_valuelen(ea) + 5;
 	}
 	if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9);
 	else {
@@ -76,24 +76,24 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
 	unsigned pos;
 	int ano, len;
 	secno a;
+	char ex[4 + 255 + 1 + 8];
 	struct extended_attribute *ea;
 	struct extended_attribute *ea_end = fnode_end_ea(fnode);
 	for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
 		if (!strcmp(ea->name, key)) {
 			if (ea->indirect)
 				goto indirect;
-			if (ea->valuelen >= size)
+			if (ea_valuelen(ea) >= size)
 				return -EINVAL;
-			memcpy(buf, ea_data(ea), ea->valuelen);
-			buf[ea->valuelen] = 0;
+			memcpy(buf, ea_data(ea), ea_valuelen(ea));
+			buf[ea_valuelen(ea)] = 0;
 			return 0;
 		}
-	a = fnode->ea_secno;
-	len = fnode->ea_size_l;
+	a = le32_to_cpu(fnode->ea_secno);
+	len = le32_to_cpu(fnode->ea_size_l);
 	ano = fnode->ea_anode;
 	pos = 0;
 	while (pos < len) {
-		char ex[4 + 255 + 1 + 8];
 		ea = (struct extended_attribute *)ex;
 		if (pos + 4 > len) {
 			hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x",
@@ -106,14 +106,14 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
 		if (!strcmp(ea->name, key)) {
 			if (ea->indirect)
 				goto indirect;
-			if (ea->valuelen >= size)
+			if (ea_valuelen(ea) >= size)
 				return -EINVAL;
-			if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, buf))
+			if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), buf))
 				return -EIO;
-			buf[ea->valuelen] = 0;
+			buf[ea_valuelen(ea)] = 0;
 			return 0;
 		}
-		pos += ea->namelen + ea->valuelen + 5;
+		pos += ea->namelen + ea_valuelen(ea) + 5;
 	}
 	return -ENOENT;
 indirect:
@@ -138,16 +138,16 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
 		if (!strcmp(ea->name, key)) {
 			if (ea->indirect)
 				return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
-			if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) {
+			if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
 				printk("HPFS: out of memory for EA\n");
 				return NULL;
 			}
-			memcpy(ret, ea_data(ea), ea->valuelen);
-			ret[ea->valuelen] = 0;
+			memcpy(ret, ea_data(ea), ea_valuelen(ea));
+			ret[ea_valuelen(ea)] = 0;
 			return ret;
 		}
-	a = fnode->ea_secno;
-	len = fnode->ea_size_l;
+	a = le32_to_cpu(fnode->ea_secno);
+	len = le32_to_cpu(fnode->ea_size_l);
 	ano = fnode->ea_anode;
 	pos = 0;
 	while (pos < len) {
@@ -164,18 +164,18 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
 		if (!strcmp(ea->name, key)) {
 			if (ea->indirect)
 				return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
-			if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) {
+			if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
 				printk("HPFS: out of memory for EA\n");
 				return NULL;
 			}
-			if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, ret)) {
+			if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), ret)) {
 				kfree(ret);
 				return NULL;
 			}
-			ret[ea->valuelen] = 0;
+			ret[ea_valuelen(ea)] = 0;
 			return ret;
 		}
-		pos += ea->namelen + ea->valuelen + 5;
+		pos += ea->namelen + ea_valuelen(ea) + 5;
 	}
 	return NULL;
 }
@@ -202,13 +202,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
 			if (ea->indirect) {
 				if (ea_len(ea) == size)
 					set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
-			} else if (ea->valuelen == size) {
+			} else if (ea_valuelen(ea) == size) {
 				memcpy(ea_data(ea), data, size);
 			}
 			return;
 		}
-	a = fnode->ea_secno;
-	len = fnode->ea_size_l;
+	a = le32_to_cpu(fnode->ea_secno);
+	len = le32_to_cpu(fnode->ea_size_l);
 	ano = fnode->ea_anode;
 	pos = 0;
 	while (pos < len) {
@@ -228,68 +228,70 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
 					set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
 			}
 			else {
-				if (ea->valuelen == size)
+				if (ea_valuelen(ea) == size)
 					hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data);
 			}
 			return;
 		}
-		pos += ea->namelen + ea->valuelen + 5;
+		pos += ea->namelen + ea_valuelen(ea) + 5;
 	}
-	if (!fnode->ea_offs) {
-		/*if (fnode->ea_size_s) {
+	if (!le16_to_cpu(fnode->ea_offs)) {
+		/*if (le16_to_cpu(fnode->ea_size_s)) {
 			hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0",
-				inode->i_ino, fnode->ea_size_s);
+				inode->i_ino, le16_to_cpu(fnode->ea_size_s));
 			return;
 		}*/
-		fnode->ea_offs = 0xc4;
+		fnode->ea_offs = cpu_to_le16(0xc4);
 	}
-	if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) {
+	if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) {
 		hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
 			(unsigned long)inode->i_ino,
-			fnode->ea_offs, fnode->ea_size_s);
+			le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
 		return;
 	}
-	if ((fnode->ea_size_s || !fnode->ea_size_l) &&
-	     fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s + strlen(key) + size + 5 <= 0x200) {
+	if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) &&
+	     le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5 <= 0x200) {
 		ea = fnode_end_ea(fnode);
 		*(char *)ea = 0;
 		ea->namelen = strlen(key);
-		ea->valuelen = size;
+		ea->valuelen_lo = size;
+		ea->valuelen_hi = size >> 8;
 		strcpy(ea->name, key);
 		memcpy(ea_data(ea), data, size);
-		fnode->ea_size_s += strlen(key) + size + 5;
+		fnode->ea_size_s = cpu_to_le16(le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5);
 		goto ret;
 	}
 	/* Most the code here is 99.9993422% unused. I hope there are no bugs.
 	   But what .. HPFS.IFS has also bugs in ea management. */
-	if (fnode->ea_size_s && !fnode->ea_size_l) {
+	if (le16_to_cpu(fnode->ea_size_s) && !le32_to_cpu(fnode->ea_size_l)) {
 		secno n;
 		struct buffer_head *bh;
 		char *data;
-		if (!(n = hpfs_alloc_sector(s, fno, 1, 0, 1))) return;
+		if (!(n = hpfs_alloc_sector(s, fno, 1, 0))) return;
 		if (!(data = hpfs_get_sector(s, n, &bh))) {
 			hpfs_free_sectors(s, n, 1);
 			return;
 		}
-		memcpy(data, fnode_ea(fnode), fnode->ea_size_s);
-		fnode->ea_size_l = fnode->ea_size_s;
-		fnode->ea_size_s = 0;
-		fnode->ea_secno = n;
-		fnode->ea_anode = 0;
+		memcpy(data, fnode_ea(fnode), le16_to_cpu(fnode->ea_size_s));
+		fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s));
+		fnode->ea_size_s = cpu_to_le16(0);
+		fnode->ea_secno = cpu_to_le32(n);
+		fnode->ea_anode = cpu_to_le32(0);
 		mark_buffer_dirty(bh);
 		brelse(bh);
 	}
-	pos = fnode->ea_size_l + 5 + strlen(key) + size;
-	len = (fnode->ea_size_l + 511) >> 9;
+	pos = le32_to_cpu(fnode->ea_size_l) + 5 + strlen(key) + size;
+	len = (le32_to_cpu(fnode->ea_size_l) + 511) >> 9;
 	if (pos >= 30000) goto bail;
 	while (((pos + 511) >> 9) > len) {
 		if (!len) {
-			if (!(fnode->ea_secno = hpfs_alloc_sector(s, fno, 1, 0, 1)))
-				goto bail;
+			secno q = hpfs_alloc_sector(s, fno, 1, 0);
+			if (!q) goto bail;
+			fnode->ea_secno = cpu_to_le32(q);
 			fnode->ea_anode = 0;
 			len++;
 		} else if (!fnode->ea_anode) {
-			if (hpfs_alloc_if_possible(s, fnode->ea_secno + len)) {
+			if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) {
 				len++;
 			} else {
 				/* Aargh... don't know how to create ea anodes :-( */
@@ -298,26 +300,26 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
 				anode_secno a_s;
 				if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh)))
 					goto bail;
-				anode->up = fno;
+				anode->up = cpu_to_le32(fno);
 				anode->btree.fnode_parent = 1;
 				anode->btree.n_free_nodes--;
 				anode->btree.n_used_nodes++;
-				anode->btree.first_free += 12;
-				anode->u.external[0].disk_secno = fnode->ea_secno;
-				anode->u.external[0].file_secno = 0;
-				anode->u.external[0].length = len;
+				anode->btree.first_free = cpu_to_le16(le16_to_cpu(anode->btree.first_free) + 12);
+				anode->u.external[0].disk_secno = cpu_to_le32(le32_to_cpu(fnode->ea_secno));
+				anode->u.external[0].file_secno = cpu_to_le32(0);
+				anode->u.external[0].length = cpu_to_le32(len);
 				mark_buffer_dirty(bh);
 				brelse(bh);
 				fnode->ea_anode = 1;
-				fnode->ea_secno = a_s;*/
+				fnode->ea_secno = cpu_to_le32(a_s);*/
 				secno new_sec;
 				int i;
-				if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9), 1)))
+				if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9))))
 					goto bail;
 				for (i = 0; i < len; i++) {
 					struct buffer_head *bh1, *bh2;
 					void *b1, *b2;
-					if (!(b1 = hpfs_map_sector(s, fnode->ea_secno + i, &bh1, len - i - 1))) {
+					if (!(b1 = hpfs_map_sector(s, le32_to_cpu(fnode->ea_secno) + i, &bh1, len - i - 1))) {
 						hpfs_free_sectors(s, new_sec, (pos + 511) >> 9);
 						goto bail;
 					}
@@ -331,13 +333,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
 					mark_buffer_dirty(bh2);
 					brelse(bh2);
 				}
-				hpfs_free_sectors(s, fnode->ea_secno, len);
-				fnode->ea_secno = new_sec;
+				hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno), len);
+				fnode->ea_secno = cpu_to_le32(new_sec);
 				len = (pos + 511) >> 9;
 			}
 		}
 		if (fnode->ea_anode) {
-			if (hpfs_add_sector_to_btree(s, fnode->ea_secno,
+			if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno),
 						     0, len) != -1) {
 				len++;
 			} else {
@@ -349,17 +351,17 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
 	h[1] = strlen(key);
 	h[2] = size & 0xff;
 	h[3] = size >> 8;
-	if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l, 4, h)) goto bail;
-	if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 4, h[1] + 1, key)) goto bail;
-	if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 5 + h[1], size, data)) goto bail;
-	fnode->ea_size_l = pos;
+	if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail;
+	if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail;
+	if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail;
+	fnode->ea_size_l = cpu_to_le32(pos);
 	ret:
 	hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size;
 	return;
 	bail:
-	if (fnode->ea_secno)
-		if (fnode->ea_anode) hpfs_truncate_btree(s, fnode->ea_secno, 1, (fnode->ea_size_l + 511) >> 9);
-		else hpfs_free_sectors(s, fnode->ea_secno + ((fnode->ea_size_l + 511) >> 9), len - ((fnode->ea_size_l + 511) >> 9));
-	else fnode->ea_secno = fnode->ea_size_l = 0;
+	if (le32_to_cpu(fnode->ea_secno))
+		if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9);
+		else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9));
+	else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0);
 }
 	
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9b9eb6933e43..89c500ee5213 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -20,8 +20,8 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
 
 int hpfs_file_fsync(struct file *file, int datasync)
 {
-	/*return file_fsync(file, datasync);*/
-	return 0; /* Don't fsync :-) */
+	struct inode *inode = file->f_mapping->host;
+	return sync_blockdev(inode->i_sb->s_bdev);
 }
 
 /*
@@ -48,38 +48,46 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
 static void hpfs_truncate(struct inode *i)
 {
 	if (IS_IMMUTABLE(i)) return /*-EPERM*/;
-	hpfs_lock(i->i_sb);
+	hpfs_lock_assert(i->i_sb);
+
 	hpfs_i(i)->i_n_secs = 0;
 	i->i_blocks = 1 + ((i->i_size + 511) >> 9);
 	hpfs_i(i)->mmu_private = i->i_size;
 	hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
 	hpfs_write_inode(i);
 	hpfs_i(i)->i_n_secs = 0;
-	hpfs_unlock(i->i_sb);
 }
 
 static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
 {
+	int r;
 	secno s;
+	hpfs_lock(inode->i_sb);
 	s = hpfs_bmap(inode, iblock);
 	if (s) {
 		map_bh(bh_result, inode->i_sb, s);
-		return 0;
+		goto ret_0;
 	}
-	if (!create) return 0;
+	if (!create) goto ret_0;
 	if (iblock<<9 != hpfs_i(inode)->mmu_private) {
 		BUG();
-		return -EIO;
+		r = -EIO;
+		goto ret_r;
 	}
 	if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) {
 		hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
-		return -ENOSPC;
+		r = -ENOSPC;
+		goto ret_r;
 	}
 	inode->i_blocks++;
 	hpfs_i(inode)->mmu_private += 512;
 	set_buffer_new(bh_result);
 	map_bh(bh_result, inode->i_sb, s);
-	return 0;
+	ret_0:
+	r = 0;
+	ret_r:
+	hpfs_unlock(inode->i_sb);
+	return r;
 }
 
 static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -130,8 +138,11 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
 	ssize_t retval;
 
 	retval = do_sync_write(file, buf, count, ppos);
-	if (retval > 0)
+	if (retval > 0) {
+		hpfs_lock(file->f_path.dentry->d_sb);
 		hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
+		hpfs_unlock(file->f_path.dentry->d_sb);
+	}
 	return retval;
 }
 
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
index 0e84c73cd9c4..8b0650aae328 100644
--- a/fs/hpfs/hpfs.h
+++ b/fs/hpfs/hpfs.h
@@ -19,9 +19,13 @@
    For definitive information on HPFS, ask somebody else -- this is guesswork.
    There are certain to be many mistakes. */
 
+#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
+#error unknown endian
+#endif
+
 /* Notation */
 
-typedef unsigned secno;			/* sector number, partition relative */
+typedef u32 secno;			/* sector number, partition relative */
 
 typedef secno dnode_secno;		/* sector number of a dnode */
 typedef secno fnode_secno;		/* sector number of an fnode */
@@ -38,28 +42,28 @@ typedef u32 time32_t;		/* 32-bit time_t type */
 
 struct hpfs_boot_block
 {
-  unsigned char jmp[3];
-  unsigned char oem_id[8];
-  unsigned char bytes_per_sector[2];	/* 512 */
-  unsigned char sectors_per_cluster;
-  unsigned char n_reserved_sectors[2];
-  unsigned char n_fats;
-  unsigned char n_rootdir_entries[2];
-  unsigned char n_sectors_s[2];
-  unsigned char media_byte;
-  unsigned short sectors_per_fat;
-  unsigned short sectors_per_track;
-  unsigned short heads_per_cyl;
-  unsigned int n_hidden_sectors;
-  unsigned int n_sectors_l;		/* size of partition */
-  unsigned char drive_number;
-  unsigned char mbz;
-  unsigned char sig_28h;		/* 28h */
-  unsigned char vol_serno[4];
-  unsigned char vol_label[11];
-  unsigned char sig_hpfs[8];		/* "HPFS    " */
-  unsigned char pad[448];
-  unsigned short magic;			/* aa55 */
+  u8 jmp[3];
+  u8 oem_id[8];
+  u8 bytes_per_sector[2];	/* 512 */
+  u8 sectors_per_cluster;
+  u8 n_reserved_sectors[2];
+  u8 n_fats;
+  u8 n_rootdir_entries[2];
+  u8 n_sectors_s[2];
+  u8 media_byte;
+  u16 sectors_per_fat;
+  u16 sectors_per_track;
+  u16 heads_per_cyl;
+  u32 n_hidden_sectors;
+  u32 n_sectors_l;		/* size of partition */
+  u8 drive_number;
+  u8 mbz;
+  u8 sig_28h;			/* 28h */
+  u8 vol_serno[4];
+  u8 vol_label[11];
+  u8 sig_hpfs[8];		/* "HPFS    " */
+  u8 pad[448];
+  u16 magic;			/* aa55 */
 };
 
 
@@ -71,31 +75,29 @@ struct hpfs_boot_block
 
 struct hpfs_super_block
 {
-  unsigned magic;			/* f995 e849 */
-  unsigned magic1;			/* fa53 e9c5, more magic? */
-  /*unsigned huh202;*/			/* ?? 202 = N. of B. in 1.00390625 S.*/
-  char version;				/* version of a filesystem  usually 2 */
-  char funcversion;			/* functional version - oldest version
+  u32 magic;				/* f995 e849 */
+  u32 magic1;				/* fa53 e9c5, more magic? */
+  u8 version;				/* version of a filesystem  usually 2 */
+  u8 funcversion;			/* functional version - oldest version
   					   of filesystem that can understand
 					   this disk */
-  unsigned short int zero;		/* 0 */
+  u16 zero;				/* 0 */
   fnode_secno root;			/* fnode of root directory */
   secno n_sectors;			/* size of filesystem */
-  unsigned n_badblocks;			/* number of bad blocks */
+  u32 n_badblocks;			/* number of bad blocks */
   secno bitmaps;			/* pointers to free space bit maps */
-  unsigned zero1;			/* 0 */
+  u32 zero1;				/* 0 */
   secno badblocks;			/* bad block list */
-  unsigned zero3;			/* 0 */
+  u32 zero3;				/* 0 */
   time32_t last_chkdsk;			/* date last checked, 0 if never */
-  /*unsigned zero4;*/			/* 0 */
-  time32_t last_optimize;			/* date last optimized, 0 if never */
+  time32_t last_optimize;		/* date last optimized, 0 if never */
   secno n_dir_band;			/* number of sectors in dir band */
   secno dir_band_start;			/* first sector in dir band */
   secno dir_band_end;			/* last sector in dir band */
   secno dir_band_bitmap;		/* free space map, 1 dnode per bit */
-  char volume_name[32];			/* not used */
+  u8 volume_name[32];			/* not used */
   secno user_id_table;			/* 8 preallocated sectors - user id */
-  unsigned zero6[103];			/* 0 */
+  u32 zero6[103];			/* 0 */
 };
 
 
@@ -107,44 +109,65 @@ struct hpfs_super_block
 
 struct hpfs_spare_block
 {
-  unsigned magic;			/* f991 1849 */
-  unsigned magic1;			/* fa52 29c5, more magic? */
-
-  unsigned dirty: 1;			/* 0 clean, 1 "improperly stopped" */
-  /*unsigned flag1234: 4;*/		/* unknown flags */
-  unsigned sparedir_used: 1;		/* spare dirblks used */
-  unsigned hotfixes_used: 1;		/* hotfixes used */
-  unsigned bad_sector: 1;		/* bad sector, corrupted disk (???) */
-  unsigned bad_bitmap: 1;		/* bad bitmap */
-  unsigned fast: 1;			/* partition was fast formatted */
-  unsigned old_wrote: 1;		/* old version wrote to partion */
-  unsigned old_wrote_1: 1;		/* old version wrote to partion (?) */
-  unsigned install_dasd_limits: 1;	/* HPFS386 flags */
-  unsigned resynch_dasd_limits: 1;
-  unsigned dasd_limits_operational: 1;
-  unsigned multimedia_active: 1;
-  unsigned dce_acls_active: 1;
-  unsigned dasd_limits_dirty: 1;
-  unsigned flag67: 2;
-  unsigned char mm_contlgulty;
-  unsigned char unused;
+  u32 magic;				/* f991 1849 */
+  u32 magic1;				/* fa52 29c5, more magic? */
+
+#ifdef __LITTLE_ENDIAN
+  u8 dirty: 1;				/* 0 clean, 1 "improperly stopped" */
+  u8 sparedir_used: 1;			/* spare dirblks used */
+  u8 hotfixes_used: 1;			/* hotfixes used */
+  u8 bad_sector: 1;			/* bad sector, corrupted disk (???) */
+  u8 bad_bitmap: 1;			/* bad bitmap */
+  u8 fast: 1;				/* partition was fast formatted */
+  u8 old_wrote: 1;			/* old version wrote to partion */
+  u8 old_wrote_1: 1;			/* old version wrote to partion (?) */
+#else
+  u8 old_wrote_1: 1;			/* old version wrote to partion (?) */
+  u8 old_wrote: 1;			/* old version wrote to partion */
+  u8 fast: 1;				/* partition was fast formatted */
+  u8 bad_bitmap: 1;			/* bad bitmap */
+  u8 bad_sector: 1;			/* bad sector, corrupted disk (???) */
+  u8 hotfixes_used: 1;			/* hotfixes used */
+  u8 sparedir_used: 1;			/* spare dirblks used */
+  u8 dirty: 1;				/* 0 clean, 1 "improperly stopped" */
+#endif
+
+#ifdef __LITTLE_ENDIAN
+  u8 install_dasd_limits: 1;		/* HPFS386 flags */
+  u8 resynch_dasd_limits: 1;
+  u8 dasd_limits_operational: 1;
+  u8 multimedia_active: 1;
+  u8 dce_acls_active: 1;
+  u8 dasd_limits_dirty: 1;
+  u8 flag67: 2;
+#else
+  u8 flag67: 2;
+  u8 dasd_limits_dirty: 1;
+  u8 dce_acls_active: 1;
+  u8 multimedia_active: 1;
+  u8 dasd_limits_operational: 1;
+  u8 resynch_dasd_limits: 1;
+  u8 install_dasd_limits: 1;		/* HPFS386 flags */
+#endif
+
+  u8 mm_contlgulty;
+  u8 unused;
 
   secno hotfix_map;			/* info about remapped bad sectors */
-  unsigned n_spares_used;		/* number of hotfixes */
-  unsigned n_spares;			/* number of spares in hotfix map */
-  unsigned n_dnode_spares_free;		/* spare dnodes unused */
-  unsigned n_dnode_spares;		/* length of spare_dnodes[] list,
+  u32 n_spares_used;			/* number of hotfixes */
+  u32 n_spares;				/* number of spares in hotfix map */
+  u32 n_dnode_spares_free;		/* spare dnodes unused */
+  u32 n_dnode_spares;			/* length of spare_dnodes[] list,
 					   follows in this block*/
   secno code_page_dir;			/* code page directory block */
-  unsigned n_code_pages;		/* number of code pages */
-  /*unsigned large_numbers[2];*/	/* ?? */
-  unsigned super_crc;			/* on HPFS386 and LAN Server this is
+  u32 n_code_pages;			/* number of code pages */
+  u32 super_crc;			/* on HPFS386 and LAN Server this is
   					   checksum of superblock, on normal
 					   OS/2 unused */
-  unsigned spare_crc;			/* on HPFS386 checksum of spareblock */
-  unsigned zero1[15];			/* unused */
+  u32 spare_crc;			/* on HPFS386 checksum of spareblock */
+  u32 zero1[15];			/* unused */
   dnode_secno spare_dnodes[100];	/* emergency free dnode list */
-  unsigned zero2[1];			/* room for more? */
+  u32 zero2[1];				/* room for more? */
 };
 
 /* The bad block list is 4 sectors long.  The first word must be zero,
@@ -179,18 +202,18 @@ struct hpfs_spare_block
 
 struct code_page_directory
 {
-  unsigned magic;			/* 4945 21f7 */
-  unsigned n_code_pages;		/* number of pointers following */
-  unsigned zero1[2];
+  u32 magic;				/* 4945 21f7 */
+  u32 n_code_pages;			/* number of pointers following */
+  u32 zero1[2];
   struct {
-    unsigned short ix;			/* index */
-    unsigned short code_page_number;	/* code page number */
-    unsigned bounds;			/* matches corresponding word
+    u16 ix;				/* index */
+    u16 code_page_number;		/* code page number */
+    u32 bounds;				/* matches corresponding word
 					   in data block */
     secno code_page_data;		/* sector number of a code_page_data
 					   containing c.p. array */
-    unsigned short index;		/* index in c.p. array in that sector*/
-    unsigned short unknown;		/* some unknown value; usually 0;
+    u16 index;				/* index in c.p. array in that sector*/
+    u16 unknown;			/* some unknown value; usually 0;
     					   2 in Japanese version */
   } array[31];				/* unknown length */
 };
@@ -201,21 +224,21 @@ struct code_page_directory
 
 struct code_page_data
 {
-  unsigned magic;			/* 8945 21f7 */
-  unsigned n_used;			/* # elements used in c_p_data[] */
-  unsigned bounds[3];			/* looks a bit like
+  u32 magic;				/* 8945 21f7 */
+  u32 n_used;				/* # elements used in c_p_data[] */
+  u32 bounds[3];			/* looks a bit like
 					     (beg1,end1), (beg2,end2)
 					   one byte each */
-  unsigned short offs[3];		/* offsets from start of sector
+  u16 offs[3];				/* offsets from start of sector
 					   to start of c_p_data[ix] */
   struct {
-    unsigned short ix;			/* index */
-    unsigned short code_page_number;	/* code page number */
-    unsigned short unknown;		/* the same as in cp directory */
-    unsigned char map[128];		/* upcase table for chars 80..ff */
-    unsigned short zero2;
+    u16 ix;				/* index */
+    u16 code_page_number;		/* code page number */
+    u16 unknown;			/* the same as in cp directory */
+    u8 map[128];			/* upcase table for chars 80..ff */
+    u16 zero2;
   } code_page[3];
-  unsigned char incognita[78];
+  u8 incognita[78];
 };
 
 
@@ -255,50 +278,84 @@ struct code_page_data
 #define DNODE_MAGIC   0x77e40aae
 
 struct dnode {
-  unsigned magic;			/* 77e4 0aae */
-  unsigned first_free;			/* offset from start of dnode to
+  u32 magic;				/* 77e4 0aae */
+  u32 first_free;			/* offset from start of dnode to
 					   first free dir entry */
-  unsigned root_dnode:1;		/* Is it root dnode? */
-  unsigned increment_me:31;		/* some kind of activity counter?
-					   Neither HPFS.IFS nor CHKDSK cares
+#ifdef __LITTLE_ENDIAN
+  u8 root_dnode: 1;			/* Is it root dnode? */
+  u8 increment_me: 7;			/* some kind of activity counter? */
+					/* Neither HPFS.IFS nor CHKDSK cares
+					   if you change this word */
+#else
+  u8 increment_me: 7;			/* some kind of activity counter? */
+					/* Neither HPFS.IFS nor CHKDSK cares
 					   if you change this word */
+  u8 root_dnode: 1;			/* Is it root dnode? */
+#endif
+  u8 increment_me2[3];
   secno up;				/* (root dnode) directory's fnode
 					   (nonroot) parent dnode */
   dnode_secno self;			/* pointer to this dnode */
-  unsigned char dirent[2028];		/* one or more dirents */
+  u8 dirent[2028];			/* one or more dirents */
 };
 
 struct hpfs_dirent {
-  unsigned short length;		/* offset to next dirent */
-  unsigned first: 1;			/* set on phony ^A^A (".") entry */
-  unsigned has_acl: 1;
-  unsigned down: 1;			/* down pointer present (after name) */
-  unsigned last: 1;			/* set on phony \377 entry */
-  unsigned has_ea: 1;			/* entry has EA */
-  unsigned has_xtd_perm: 1;		/* has extended perm list (???) */
-  unsigned has_explicit_acl: 1;
-  unsigned has_needea: 1;		/* ?? some EA has NEEDEA set
+  u16 length;				/* offset to next dirent */
+
+#ifdef __LITTLE_ENDIAN
+  u8 first: 1;				/* set on phony ^A^A (".") entry */
+  u8 has_acl: 1;
+  u8 down: 1;				/* down pointer present (after name) */
+  u8 last: 1;				/* set on phony \377 entry */
+  u8 has_ea: 1;				/* entry has EA */
+  u8 has_xtd_perm: 1;			/* has extended perm list (???) */
+  u8 has_explicit_acl: 1;
+  u8 has_needea: 1;			/* ?? some EA has NEEDEA set
+					   I have no idea why this is
+					   interesting in a dir entry */
+#else
+  u8 has_needea: 1;			/* ?? some EA has NEEDEA set
 					   I have no idea why this is
 					   interesting in a dir entry */
-  unsigned read_only: 1;		/* dos attrib */
-  unsigned hidden: 1;			/* dos attrib */
-  unsigned system: 1;			/* dos attrib */
-  unsigned flag11: 1;			/* would be volume label dos attrib */
-  unsigned directory: 1;		/* dos attrib */
-  unsigned archive: 1;			/* dos attrib */
-  unsigned not_8x3: 1;			/* name is not 8.3 */
-  unsigned flag15: 1;
+  u8 has_explicit_acl: 1;
+  u8 has_xtd_perm: 1;			/* has extended perm list (???) */
+  u8 has_ea: 1;				/* entry has EA */
+  u8 last: 1;				/* set on phony \377 entry */
+  u8 down: 1;				/* down pointer present (after name) */
+  u8 has_acl: 1;
+  u8 first: 1;				/* set on phony ^A^A (".") entry */
+#endif
+
+#ifdef __LITTLE_ENDIAN
+  u8 read_only: 1;			/* dos attrib */
+  u8 hidden: 1;				/* dos attrib */
+  u8 system: 1;				/* dos attrib */
+  u8 flag11: 1;				/* would be volume label dos attrib */
+  u8 directory: 1;			/* dos attrib */
+  u8 archive: 1;			/* dos attrib */
+  u8 not_8x3: 1;			/* name is not 8.3 */
+  u8 flag15: 1;
+#else
+  u8 flag15: 1;
+  u8 not_8x3: 1;			/* name is not 8.3 */
+  u8 archive: 1;			/* dos attrib */
+  u8 directory: 1;			/* dos attrib */
+  u8 flag11: 1;				/* would be volume label dos attrib */
+  u8 system: 1;				/* dos attrib */
+  u8 hidden: 1;				/* dos attrib */
+  u8 read_only: 1;			/* dos attrib */
+#endif
+
   fnode_secno fnode;			/* fnode giving allocation info */
   time32_t write_date;			/* mtime */
-  unsigned file_size;			/* file length, bytes */
+  u32 file_size;			/* file length, bytes */
   time32_t read_date;			/* atime */
   time32_t creation_date;			/* ctime */
-  unsigned ea_size;			/* total EA length, bytes */
-  unsigned char no_of_acls : 3;		/* number of ACL's */
-  unsigned char reserver : 5;
-  unsigned char ix;			/* code page index (of filename), see
+  u32 ea_size;				/* total EA length, bytes */
+  u8 no_of_acls;			/* number of ACL's (low 3 bits) */
+  u8 ix;				/* code page index (of filename), see
 					   struct code_page_data */
-  unsigned char namelen, name[1];	/* file name */
+  u8 namelen, name[1];			/* file name */
   /* dnode_secno down;	  btree down pointer, if present,
      			  follows name on next word boundary, or maybe it
 			  precedes next dirent, which is on a word boundary. */
@@ -318,38 +375,50 @@ struct hpfs_dirent {
 
 struct bplus_leaf_node
 {
-  unsigned file_secno;			/* first file sector in extent */
-  unsigned length;			/* length, sectors */
+  u32 file_secno;			/* first file sector in extent */
+  u32 length;				/* length, sectors */
   secno disk_secno;			/* first corresponding disk sector */
 };
 
 struct bplus_internal_node
 {
-  unsigned file_secno;			/* subtree maps sectors < this  */
+  u32 file_secno;			/* subtree maps sectors < this  */
   anode_secno down;			/* pointer to subtree */
 };
 
 struct bplus_header
 {
-  unsigned hbff: 1;	/* high bit of first free entry offset */
-  unsigned flag1: 1;
-  unsigned flag2: 1;
-  unsigned flag3: 1;
-  unsigned flag4: 1;
-  unsigned fnode_parent: 1;		/* ? we're pointed to by an fnode,
+#ifdef __LITTLE_ENDIAN
+  u8 hbff: 1;			/* high bit of first free entry offset */
+  u8 flag1234: 4;
+  u8 fnode_parent: 1;			/* ? we're pointed to by an fnode,
 					   the data btree or some ea or the
 					   main ea bootage pointer ea_secno */
 					/* also can get set in fnodes, which
 					   may be a chkdsk glitch or may mean
 					   this bit is irrelevant in fnodes,
 					   or this interpretation is all wet */
-  unsigned binary_search: 1;		/* suggest binary search (unused) */
-  unsigned internal: 1;			/* 1 -> (internal) tree of anodes
+  u8 binary_search: 1;			/* suggest binary search (unused) */
+  u8 internal: 1;			/* 1 -> (internal) tree of anodes
+					   0 -> (leaf) list of extents */
+#else
+  u8 internal: 1;			/* 1 -> (internal) tree of anodes
 					   0 -> (leaf) list of extents */
-  unsigned char fill[3];
-  unsigned char n_free_nodes;		/* free nodes in following array */
-  unsigned char n_used_nodes;		/* used nodes in following array */
-  unsigned short first_free;		/* offset from start of header to
+  u8 binary_search: 1;			/* suggest binary search (unused) */
+  u8 fnode_parent: 1;			/* ? we're pointed to by an fnode,
+					   the data btree or some ea or the
+					   main ea bootage pointer ea_secno */
+					/* also can get set in fnodes, which
+					   may be a chkdsk glitch or may mean
+					   this bit is irrelevant in fnodes,
+					   or this interpretation is all wet */
+  u8 flag1234: 4;
+  u8 hbff: 1;			/* high bit of first free entry offset */
+#endif
+  u8 fill[3];
+  u8 n_free_nodes;			/* free nodes in following array */
+  u8 n_used_nodes;			/* used nodes in following array */
+  u16 first_free;			/* offset from start of header to
 					   first free node in array */
   union {
     struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
@@ -369,37 +438,38 @@ struct bplus_header
 
 struct fnode
 {
-  unsigned magic;			/* f7e4 0aae */
-  unsigned zero1[2];			/* read history */
-  unsigned char len, name[15];		/* true length, truncated name */
+  u32 magic;				/* f7e4 0aae */
+  u32 zero1[2];				/* read history */
+  u8 len, name[15];			/* true length, truncated name */
   fnode_secno up;			/* pointer to file's directory fnode */
-  /*unsigned zero2[3];*/
   secno acl_size_l;
   secno acl_secno;
-  unsigned short acl_size_s;
-  char acl_anode;
-  char zero2;				/* history bit count */
-  unsigned ea_size_l;			/* length of disk-resident ea's */
+  u16 acl_size_s;
+  u8 acl_anode;
+  u8 zero2;				/* history bit count */
+  u32 ea_size_l;			/* length of disk-resident ea's */
   secno ea_secno;			/* first sector of disk-resident ea's*/
-  unsigned short ea_size_s;		/* length of fnode-resident ea's */
-
-  unsigned flag0: 1;
-  unsigned ea_anode: 1;			/* 1 -> ea_secno is an anode */
-  unsigned flag2: 1;
-  unsigned flag3: 1;
-  unsigned flag4: 1;
-  unsigned flag5: 1;
-  unsigned flag6: 1;
-  unsigned flag7: 1;
-  unsigned dirflag: 1;			/* 1 -> directory.  first & only extent
+  u16 ea_size_s;			/* length of fnode-resident ea's */
+
+#ifdef __LITTLE_ENDIAN
+  u8 flag0: 1;
+  u8 ea_anode: 1;			/* 1 -> ea_secno is an anode */
+  u8 flag234567: 6;
+#else
+  u8 flag234567: 6;
+  u8 ea_anode: 1;			/* 1 -> ea_secno is an anode */
+  u8 flag0: 1;
+#endif
+
+#ifdef __LITTLE_ENDIAN
+  u8 dirflag: 1;			/* 1 -> directory.  first & only extent
 					   points to dnode. */
-  unsigned flag9: 1;
-  unsigned flag10: 1;
-  unsigned flag11: 1;
-  unsigned flag12: 1;
-  unsigned flag13: 1;
-  unsigned flag14: 1;
-  unsigned flag15: 1;
+  u8 flag9012345: 7;
+#else
+  u8 flag9012345: 7;
+  u8 dirflag: 1;			/* 1 -> directory.  first & only extent
+					   points to dnode. */
+#endif
 
   struct bplus_header btree;		/* b+ tree, 8 extents or 12 subtrees */
   union {
@@ -407,17 +477,16 @@ struct fnode
     struct bplus_internal_node internal[12];
   } u;
 
-  unsigned file_size;			/* file length, bytes */
-  unsigned n_needea;			/* number of EA's with NEEDEA set */
-  char user_id[16];			/* unused */
-  unsigned short ea_offs;		/* offset from start of fnode
+  u32 file_size;			/* file length, bytes */
+  u32 n_needea;				/* number of EA's with NEEDEA set */
+  u8 user_id[16];			/* unused */
+  u16 ea_offs;				/* offset from start of fnode
 					   to first fnode-resident ea */
-  char dasd_limit_treshhold;
-  char dasd_limit_delta;
-  unsigned dasd_limit;
-  unsigned dasd_usage;
-  /*unsigned zero5[2];*/
-  unsigned char ea[316];		/* zero or more EA's, packed together
+  u8 dasd_limit_treshhold;
+  u8 dasd_limit_delta;
+  u32 dasd_limit;
+  u32 dasd_usage;
+  u8 ea[316];				/* zero or more EA's, packed together
 					   with no alignment padding.
 					   (Do not use this name, get here
 					   via fnode + ea_offs. I think.) */
@@ -430,7 +499,7 @@ struct fnode
 
 struct anode
 {
-  unsigned magic;			/* 37e4 0aae */
+  u32 magic;				/* 37e4 0aae */
   anode_secno self;			/* pointer to this anode */
   secno up;				/* parent anode or fnode */
 
@@ -440,7 +509,7 @@ struct anode
     struct bplus_internal_node internal[60];
   } u;
 
-  unsigned fill[3];			/* unused */
+  u32 fill[3];				/* unused */
 };
 
 
@@ -461,25 +530,31 @@ struct anode
 
 struct extended_attribute
 {
-  unsigned indirect: 1;			/* 1 -> value gives sector number
+#ifdef __LITTLE_ENDIAN
+  u8 indirect: 1;			/* 1 -> value gives sector number
 					   where real value starts */
-  unsigned anode: 1;			/* 1 -> sector is an anode
+  u8 anode: 1;				/* 1 -> sector is an anode
+					   that points to fragmented value */
+  u8 flag23456: 5;
+  u8 needea: 1;				/* required ea */
+#else
+  u8 needea: 1;				/* required ea */
+  u8 flag23456: 5;
+  u8 anode: 1;				/* 1 -> sector is an anode
 					   that points to fragmented value */
-  unsigned flag2: 1;
-  unsigned flag3: 1;
-  unsigned flag4: 1;
-  unsigned flag5: 1;
-  unsigned flag6: 1;
-  unsigned needea: 1;			/* required ea */
-  unsigned char namelen;		/* length of name, bytes */
-  unsigned short valuelen;		/* length of value, bytes */
-  unsigned char name[0];
+  u8 indirect: 1;			/* 1 -> value gives sector number
+					   where real value starts */
+#endif
+  u8 namelen;				/* length of name, bytes */
+  u8 valuelen_lo;			/* length of value, bytes */
+  u8 valuelen_hi;			/* length of value, bytes */
+  u8 name[0];
   /*
-    unsigned char name[namelen];	ascii attrib name
-    unsigned char nul;			terminating '\0', not counted
-    unsigned char value[valuelen];	value, arbitrary
+    u8 name[namelen];			ascii attrib name
+    u8 nul;				terminating '\0', not counted
+    u8 value[valuelen];			value, arbitrary
       if this.indirect, valuelen is 8 and the value is
-        unsigned length;		real length of value, bytes
+        u32 length;			real length of value, bytes
         secno secno;			sector address where it starts
       if this.anode, the above sector number is the root of an anode tree
         which points to the value.
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c15adbca07ff..dd552f862c8f 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
+#include <asm/unaligned.h>
 
 #include "hpfs.h"
 
@@ -51,18 +52,16 @@ struct hpfs_inode_info {
 	unsigned i_disk_sec;	/* (files) minimalist cache of alloc info */
 	unsigned i_n_secs;	/* (files) minimalist cache of alloc info */
 	unsigned i_ea_size;	/* size of extended attributes */
-	unsigned i_conv : 2;	/* (files) crlf->newline hackery */
 	unsigned i_ea_mode : 1;	/* file's permission is stored in ea */
 	unsigned i_ea_uid : 1;	/* file's uid is stored in ea */
 	unsigned i_ea_gid : 1;	/* file's gid is stored in ea */
 	unsigned i_dirty : 1;
-	struct mutex i_mutex;
-	struct mutex i_parent_mutex;
 	loff_t **i_rddir_off;
 	struct inode vfs_inode;
 };
 
 struct hpfs_sb_info {
+	struct mutex hpfs_mutex;	/* global hpfs lock */
 	ino_t sb_root;			/* inode number of root dir */
 	unsigned sb_fs_size;		/* file system size, sectors */
 	unsigned sb_bitmaps;		/* sector number of bitmap list */
@@ -74,7 +73,6 @@ struct hpfs_sb_info {
 	uid_t sb_uid;			/* uid from mount options */
 	gid_t sb_gid;			/* gid from mount options */
 	umode_t sb_mode;		/* mode from mount options */
-	unsigned sb_conv : 2;		/* crlf->newline hackery */
 	unsigned sb_eas : 2;		/* eas: 0-ignore, 1-ro, 2-rw */
 	unsigned sb_err : 2;		/* on errs: 0-cont, 1-ro, 2-panic */
 	unsigned sb_chk : 2;		/* checks: 0-no, 1-normal, 2-strict */
@@ -87,20 +85,9 @@ struct hpfs_sb_info {
 	unsigned *sb_bmp_dir;		/* main bitmap directory */
 	unsigned sb_c_bitmap;		/* current bitmap */
 	unsigned sb_max_fwd_alloc;	/* max forwad allocation */
-	struct mutex hpfs_creation_de;	/* when creating dirents, nobody else
-					   can alloc blocks */
-	/*unsigned sb_mounting : 1;*/
 	int sb_timeshift;
 };
 
-/*
- * conv= options
- */
-
-#define CONV_BINARY 0			/* no conversion */
-#define CONV_TEXT 1			/* crlf->newline */
-#define CONV_AUTO 2			/* decide based on file contents */
-
 /* Four 512-byte buffers and the 2k block obtained by concatenating them */
 
 struct quad_buffer_head {
@@ -113,7 +100,7 @@ struct quad_buffer_head {
 static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
 {
   CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n"));
-  return *(dnode_secno *) ((void *) de + de->length - 4);
+  return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4));
 }
 
 /* The first dir entry in a dnode */
@@ -127,41 +114,46 @@ static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
 
 static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
 {
-  CHKCOND(dnode->first_free>=0x14 && dnode->first_free<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %d\n",(int)dnode->first_free));
-  return (void *) dnode + dnode->first_free;
+  CHKCOND(le32_to_cpu(dnode->first_free)>=0x14 && le32_to_cpu(dnode->first_free)<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %x\n",(unsigned)le32_to_cpu(dnode->first_free)));
+  return (void *) dnode + le32_to_cpu(dnode->first_free);
 }
 
 /* The dir entry after dir entry de */
 
 static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
 {
-  CHKCOND(de->length>=0x20 && de->length<0x800,("HPFS: de_next_de: de->length = %d\n",(int)de->length));
-  return (void *) de + de->length;
+  CHKCOND(le16_to_cpu(de->length)>=0x20 && le16_to_cpu(de->length)<0x800,("HPFS: de_next_de: de->length = %x\n",(unsigned)le16_to_cpu(de->length)));
+  return (void *) de + le16_to_cpu(de->length);
 }
 
 static inline struct extended_attribute *fnode_ea(struct fnode *fnode)
 {
-	return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s);
+	return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s));
 }
 
 static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode)
 {
-	return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s);
+	return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s));
+}
+
+static unsigned ea_valuelen(struct extended_attribute *ea)
+{
+	return ea->valuelen_lo + 256 * ea->valuelen_hi;
 }
 
 static inline struct extended_attribute *next_ea(struct extended_attribute *ea)
 {
-	return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea->valuelen);
+	return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea_valuelen(ea));
 }
 
 static inline secno ea_sec(struct extended_attribute *ea)
 {
-	return *(secno *)((char *)ea + 9 + ea->namelen);
+	return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen)));
 }
 
 static inline secno ea_len(struct extended_attribute *ea)
 {
-	return *(secno *)((char *)ea + 5 + ea->namelen);
+	return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen)));
 }
 
 static inline char *ea_data(struct extended_attribute *ea)
@@ -186,13 +178,13 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src)
 	dst->not_8x3 = n;
 }
 
-static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
+static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n)
 {
 	int i;
 	if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n;
-	if (!((bmp[(b & 0x3fff) >> 5] >> (b & 0x1f)) & 1)) return 1;
+	if (!((le32_to_cpu(bmp[(b & 0x3fff) >> 5]) >> (b & 0x1f)) & 1)) return 1;
 	for (i = 1; i < n; i++)
-		if (/*b+i < 0x4000 &&*/ !((bmp[((b+i) & 0x3fff) >> 5] >> ((b+i) & 0x1f)) & 1))
+		if (!((le32_to_cpu(bmp[((b+i) & 0x3fff) >> 5]) >> ((b+i) & 0x1f)) & 1))
 			return i + 1;
 	return 0;
 }
@@ -200,12 +192,12 @@ static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
 /* alloc.c */
 
 int hpfs_chk_sectors(struct super_block *, secno, int, char *);
-secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int, int);
+secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int);
 int hpfs_alloc_if_possible(struct super_block *, secno);
 void hpfs_free_sectors(struct super_block *, secno, unsigned);
 int hpfs_check_free_dnodes(struct super_block *, int);
 void hpfs_free_dnode(struct super_block *, secno);
-struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *, int);
+struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
 struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
 struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
 
@@ -222,8 +214,6 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno);
 
 /* buffer.c */
 
-void hpfs_lock_creation(struct super_block *);
-void hpfs_unlock_creation(struct super_block *);
 void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int);
 void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **);
 void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int);
@@ -247,7 +237,7 @@ void hpfs_del_pos(struct inode *, loff_t *);
 struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *,
 				const unsigned char *, unsigned, secno);
 int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned,
-		    struct hpfs_dirent *, int);
+		    struct hpfs_dirent *);
 int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int);
 void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *);
 dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno);
@@ -303,7 +293,6 @@ int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned,
 		       const unsigned char *, unsigned, int);
 int hpfs_is_name_long(const unsigned char *, unsigned);
 void hpfs_adjust_length(const unsigned char *, unsigned *);
-void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned);
 
 /* namei.c */
 
@@ -346,21 +335,26 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t)
 /*
  * Locking:
  *
- * hpfs_lock() is a leftover from the big kernel lock.
- * Right now, these functions are empty and only left
- * for documentation purposes. The file system no longer
- * works on SMP systems, so the lock is not needed
- * any more.
+ * hpfs_lock() locks the whole filesystem. It must be taken
+ * on any method called by the VFS.
  *
- * If someone is interested in making it work again, this
- * would be the place to start by adding a per-superblock
- * mutex and fixing all the bugs and performance issues
- * caused by that.
+ * We don't do any per-file locking anymore, it is hard to
+ * review and HPFS is not performance-sensitive anyway.
  */
 static inline void hpfs_lock(struct super_block *s)
 {
+	struct hpfs_sb_info *sbi = hpfs_sb(s);
+	mutex_lock(&sbi->hpfs_mutex);
 }
 
 static inline void hpfs_unlock(struct super_block *s)
 {
+	struct hpfs_sb_info *sbi = hpfs_sb(s);
+	mutex_unlock(&sbi->hpfs_mutex);
+}
+
+static inline void hpfs_lock_assert(struct super_block *s)
+{
+	struct hpfs_sb_info *sbi = hpfs_sb(s);
+	WARN_ON(!mutex_is_locked(&sbi->hpfs_mutex));
 }
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 87f1f787e767..338cd8368451 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
 	i->i_uid = hpfs_sb(sb)->sb_uid;
 	i->i_gid = hpfs_sb(sb)->sb_gid;
 	i->i_mode = hpfs_sb(sb)->sb_mode;
-	hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
 	i->i_size = -1;
 	i->i_blocks = -1;
 	
@@ -116,8 +115,8 @@ void hpfs_read_inode(struct inode *i)
 		i->i_mode |= S_IFDIR;
 		i->i_op = &hpfs_dir_iops;
 		i->i_fop = &hpfs_dir_ops;
-		hpfs_inode->i_parent_dir = fnode->up;
-		hpfs_inode->i_dno = fnode->u.external[0].disk_secno;
+		hpfs_inode->i_parent_dir = le32_to_cpu(fnode->up);
+		hpfs_inode->i_dno = le32_to_cpu(fnode->u.external[0].disk_secno);
 		if (hpfs_sb(sb)->sb_chk >= 2) {
 			struct buffer_head *bh0;
 			if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0);
@@ -133,7 +132,7 @@ void hpfs_read_inode(struct inode *i)
 		i->i_op = &hpfs_file_iops;
 		i->i_fop = &hpfs_file_ops;
 		i->i_nlink = 1;
-		i->i_size = fnode->file_size;
+		i->i_size = le32_to_cpu(fnode->file_size);
 		i->i_blocks = ((i->i_size + 511) >> 9) + 1;
 		i->i_data.a_ops = &hpfs_aops;
 		hpfs_i(i)->mmu_private = i->i_size;
@@ -144,7 +143,7 @@ void hpfs_read_inode(struct inode *i)
 static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
 {
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
-	/*if (fnode->acl_size_l || fnode->acl_size_s) {
+	/*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
 		   Some unknown structures like ACL may be in fnode,
 		   we'd better not overwrite them
 		hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
@@ -187,9 +186,7 @@ void hpfs_write_inode(struct inode *i)
 		kfree(hpfs_inode->i_rddir_off);
 		hpfs_inode->i_rddir_off = NULL;
 	}
-	mutex_lock(&hpfs_inode->i_parent_mutex);
 	if (!i->i_nlink) {
-		mutex_unlock(&hpfs_inode->i_parent_mutex);
 		return;
 	}
 	parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir);
@@ -200,14 +197,9 @@ void hpfs_write_inode(struct inode *i)
 			hpfs_read_inode(parent);
 			unlock_new_inode(parent);
 		}
-		mutex_lock(&hpfs_inode->i_mutex);
 		hpfs_write_inode_nolock(i);
-		mutex_unlock(&hpfs_inode->i_mutex);
 		iput(parent);
-	} else {
-		mark_inode_dirty(i);
 	}
-	mutex_unlock(&hpfs_inode->i_parent_mutex);
 }
 
 void hpfs_write_inode_nolock(struct inode *i)
@@ -226,30 +218,30 @@ void hpfs_write_inode_nolock(struct inode *i)
 		}
 	} else de = NULL;
 	if (S_ISREG(i->i_mode)) {
-		fnode->file_size = i->i_size;
-		if (de) de->file_size = i->i_size;
+		fnode->file_size = cpu_to_le32(i->i_size);
+		if (de) de->file_size = cpu_to_le32(i->i_size);
 	} else if (S_ISDIR(i->i_mode)) {
-		fnode->file_size = 0;
-		if (de) de->file_size = 0;
+		fnode->file_size = cpu_to_le32(0);
+		if (de) de->file_size = cpu_to_le32(0);
 	}
 	hpfs_write_inode_ea(i, fnode);
 	if (de) {
-		de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec);
-		de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec);
-		de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec);
+		de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
+		de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
+		de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
 		de->read_only = !(i->i_mode & 0222);
-		de->ea_size = hpfs_inode->i_ea_size;
+		de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
 		hpfs_mark_4buffers_dirty(&qbh);
 		hpfs_brelse4(&qbh);
 	}
 	if (S_ISDIR(i->i_mode)) {
 		if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
-			de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec);
-			de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec);
-			de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec);
+			de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
+			de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
+			de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
 			de->read_only = !(i->i_mode & 0222);
-			de->ea_size = /*hpfs_inode->i_ea_size*/0;
-			de->file_size = 0;
+			de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
+			de->file_size = cpu_to_le32(0);
 			hpfs_mark_4buffers_dirty(&qbh);
 			hpfs_brelse4(&qbh);
 		} else
@@ -269,6 +261,10 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
 	hpfs_lock(inode->i_sb);
 	if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
 		goto out_unlock;
+	if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000)
+		goto out_unlock;
+	if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000)
+		goto out_unlock;
 	if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
 		goto out_unlock;
 
@@ -284,7 +280,6 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	setattr_copy(inode, attr);
-	mark_inode_dirty(inode);
 
 	hpfs_write_inode(inode);
 
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 840d033ecee8..a790821366a7 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -21,7 +21,7 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block,
 		hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id);
 		return NULL;
 	}
-	sec = hpfs_sb(s)->sb_bmp_dir[bmp_block];
+	sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]);
 	if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) {
 		hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id);
 		return NULL;
@@ -46,18 +46,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
 	struct code_page_data *cpd;
 	struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0);
 	if (!cp) return NULL;
-	if (cp->magic != CP_DIR_MAGIC) {
-		printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", cp->magic);
+	if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) {
+		printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic));
 		brelse(bh);
 		return NULL;
 	}
-	if (!cp->n_code_pages) {
+	if (!le32_to_cpu(cp->n_code_pages)) {
 		printk("HPFS: n_code_pages == 0\n");
 		brelse(bh);
 		return NULL;
 	}
-	cpds = cp->array[0].code_page_data;
-	cpi = cp->array[0].index;
+	cpds = le32_to_cpu(cp->array[0].code_page_data);
+	cpi = le16_to_cpu(cp->array[0].index);
 	brelse(bh);
 
 	if (cpi >= 3) {
@@ -66,12 +66,12 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
 	}
 	
 	if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL;
-	if ((unsigned)cpd->offs[cpi] > 0x178) {
+	if (le16_to_cpu(cpd->offs[cpi]) > 0x178) {
 		printk("HPFS: Code page index out of sector\n");
 		brelse(bh);
 		return NULL;
 	}
-	ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6;
+	ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6;
 	if (!(cp_table = kmalloc(256, GFP_KERNEL))) {
 		printk("HPFS: out of memory for code page table\n");
 		brelse(bh);
@@ -125,7 +125,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
 		if (hpfs_sb(s)->sb_chk) {
 			struct extended_attribute *ea;
 			struct extended_attribute *ea_end;
-			if (fnode->magic != FNODE_MAGIC) {
+			if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) {
 				hpfs_error(s, "bad magic on fnode %08lx",
 					(unsigned long)ino);
 				goto bail;
@@ -138,7 +138,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
 					    (unsigned long)ino);
 					goto bail;
 				}
-				if (fnode->btree.first_free !=
+				if (le16_to_cpu(fnode->btree.first_free) !=
 				    8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) {
 					hpfs_error(s,
 					    "bad first_free pointer in fnode %08lx",
@@ -146,12 +146,12 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
 					goto bail;
 				}
 			}
-			if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 ||
-			   (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) {
+			if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 ||
+			   le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) {
 				hpfs_error(s,
 					"bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x",
 					(unsigned long)ino,
-					fnode->ea_offs, fnode->ea_size_s);
+					le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
 				goto bail;
 			}
 			ea = fnode_ea(fnode);
@@ -178,16 +178,20 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff
 	if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL;
 	if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD)))
 		if (hpfs_sb(s)->sb_chk) {
-			if (anode->magic != ANODE_MAGIC || anode->self != ano) {
+			if (le32_to_cpu(anode->magic) != ANODE_MAGIC) {
 				hpfs_error(s, "bad magic on anode %08x", ano);
 				goto bail;
 			}
+			if (le32_to_cpu(anode->self) != ano) {
+				hpfs_error(s, "self pointer invalid on anode %08x", ano);
+				goto bail;
+			}
 			if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes !=
 			    (anode->btree.internal ? 60 : 40)) {
 				hpfs_error(s, "bad number of nodes in anode %08x", ano);
 				goto bail;
 			}
-			if (anode->btree.first_free !=
+			if (le16_to_cpu(anode->btree.first_free) !=
 			    8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) {
 				hpfs_error(s, "bad first_free pointer in anode %08x", ano);
 				goto bail;
@@ -219,26 +223,26 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
 			unsigned p, pp = 0;
 			unsigned char *d = (unsigned char *)dnode;
 			int b = 0;
-			if (dnode->magic != DNODE_MAGIC) {
+			if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) {
 				hpfs_error(s, "bad magic on dnode %08x", secno);
 				goto bail;
 			}
-			if (dnode->self != secno)
-				hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, dnode->self);
+			if (le32_to_cpu(dnode->self) != secno)
+				hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self));
 			/* Check dirents - bad dirents would cause infinite
 			   loops or shooting to memory */
-			if (dnode->first_free > 2048/* || dnode->first_free < 84*/) {
-				hpfs_error(s, "dnode %08x has first_free == %08x", secno, dnode->first_free);
+			if (le32_to_cpu(dnode->first_free) > 2048) {
+				hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free));
 				goto bail;
 			}
-			for (p = 20; p < dnode->first_free; p += d[p] + (d[p+1] << 8)) {
+			for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) {
 				struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p);
-				if (de->length > 292 || (de->length < 32) || (de->length & 3) || p + de->length > 2048) {
+				if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) {
 					hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
 					goto bail;
 				}
-				if (((31 + de->namelen + de->down*4 + 3) & ~3) != de->length) {
-					if (((31 + de->namelen + de->down*4 + 3) & ~3) < de->length && s->s_flags & MS_RDONLY) goto ok;
+				if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) {
+					if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok;
 					hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
 					goto bail;
 				}
@@ -251,7 +255,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
 				pp = p;
 				
 			}
-			if (p != dnode->first_free) {
+			if (p != le32_to_cpu(dnode->first_free)) {
 				hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno);
 				goto bail;
 			}
@@ -277,7 +281,7 @@ dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino)
 	if (!fnode)
 		return 0;
 
-	dno = fnode->u.external[0].disk_secno;
+	dno = le32_to_cpu(fnode->u.external[0].disk_secno);
 	brelse(bh);
 	return dno;
 }
diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c
index f24736d7a439..9acdf338def0 100644
--- a/fs/hpfs/name.c
+++ b/fs/hpfs/name.c
@@ -8,39 +8,6 @@
 
 #include "hpfs_fn.h"
 
-static const char *text_postfix[]={
-".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF",
-".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS",
-".RC", ".TEX", ".TXT", ".Y", ""};
-
-static const char *text_prefix[]={
-"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ",
-"MAKEFILE", "READ.ME", "README", "TERMCAP", ""};
-
-void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len)
-{
-	struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
-	int i;
-	if (hpfs_inode->i_conv != CONV_AUTO) return;
-	for (i = 0; *text_postfix[i]; i++) {
-		int l = strlen(text_postfix[i]);
-		if (l <= len)
-			if (!hpfs_compare_names(inode->i_sb, text_postfix[i], l, name + len - l, l, 0))
-				goto text;
-	}
-	for (i = 0; *text_prefix[i]; i++) {
-		int l = strlen(text_prefix[i]);
-		if (l <= len)
-			if (!hpfs_compare_names(inode->i_sb, text_prefix[i], l, name, l, 0))
-				goto text;
-	}
-	hpfs_inode->i_conv = CONV_BINARY;
-	return;
-	text:
-	hpfs_inode->i_conv = CONV_TEXT;
-	return;
-}
-
 static inline int not_allowed_char(unsigned char c)
 {
 	return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' ||
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d5f8c8a19023..1f05839c27a7 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -29,7 +29,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
 	if (!fnode)
 		goto bail;
-	dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0, 1);
+	dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0);
 	if (!dnode)
 		goto bail1;
 	memset(&dee, 0, sizeof dee);
@@ -37,8 +37,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (!(mode & 0222)) dee.read_only = 1;
 	/*dee.archive = 0;*/
 	dee.hidden = name[0] == '.';
-	dee.fnode = fno;
-	dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+	dee.fnode = cpu_to_le32(fno);
+	dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
 	result = new_inode(dir->i_sb);
 	if (!result)
 		goto bail2;
@@ -46,7 +46,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	result->i_ino = fno;
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
 	hpfs_i(result)->i_dno = dno;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
 	result->i_ctime.tv_nsec = 0; 
 	result->i_mtime.tv_nsec = 0; 
 	result->i_atime.tv_nsec = 0; 
@@ -60,8 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (dee.read_only)
 		result->i_mode &= ~0222;
 
-	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee);
 	if (r == 1)
 		goto bail3;
 	if (r == -1) {
@@ -70,21 +69,21 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	}
 	fnode->len = len;
 	memcpy(fnode->name, name, len > 15 ? 15 : len);
-	fnode->up = dir->i_ino;
+	fnode->up = cpu_to_le32(dir->i_ino);
 	fnode->dirflag = 1;
 	fnode->btree.n_free_nodes = 7;
 	fnode->btree.n_used_nodes = 1;
-	fnode->btree.first_free = 0x14;
-	fnode->u.external[0].disk_secno = dno;
-	fnode->u.external[0].file_secno = -1;
+	fnode->btree.first_free = cpu_to_le16(0x14);
+	fnode->u.external[0].disk_secno = cpu_to_le32(dno);
+	fnode->u.external[0].file_secno = cpu_to_le32(-1);
 	dnode->root_dnode = 1;
-	dnode->up = fno;
+	dnode->up = cpu_to_le32(fno);
 	de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0);
-	de->creation_date = de->write_date = de->read_date = gmt_to_local(dir->i_sb, get_seconds());
+	de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
 	if (!(mode & 0222)) de->read_only = 1;
 	de->first = de->directory = 1;
 	/*de->hidden = de->system = 0;*/
-	de->fnode = fno;
+	de->fnode = cpu_to_le32(fno);
 	mark_buffer_dirty(bh);
 	brelse(bh);
 	hpfs_mark_4buffers_dirty(&qbh0);
@@ -101,11 +100,9 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		hpfs_write_inode_nolock(result);
 	}
 	d_instantiate(dentry, result);
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	hpfs_unlock(dir->i_sb);
 	return 0;
 bail3:
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail2:
 	hpfs_brelse4(&qbh0);
@@ -140,8 +137,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 	if (!(mode & 0222)) dee.read_only = 1;
 	dee.archive = 1;
 	dee.hidden = name[0] == '.';
-	dee.fnode = fno;
-	dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+	dee.fnode = cpu_to_le32(fno);
+	dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
 
 	result = new_inode(dir->i_sb);
 	if (!result)
@@ -154,9 +151,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 	result->i_op = &hpfs_file_iops;
 	result->i_fop = &hpfs_file_ops;
 	result->i_nlink = 1;
-	hpfs_decide_conv(result, name, len);
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
 	result->i_ctime.tv_nsec = 0;
 	result->i_mtime.tv_nsec = 0;
 	result->i_atime.tv_nsec = 0;
@@ -168,8 +164,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 	result->i_data.a_ops = &hpfs_aops;
 	hpfs_i(result)->mmu_private = 0;
 
-	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee);
 	if (r == 1)
 		goto bail2;
 	if (r == -1) {
@@ -178,7 +173,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 	}
 	fnode->len = len;
 	memcpy(fnode->name, name, len > 15 ? 15 : len);
-	fnode->up = dir->i_ino;
+	fnode->up = cpu_to_le32(dir->i_ino);
 	mark_buffer_dirty(bh);
 	brelse(bh);
 
@@ -193,12 +188,10 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
 		hpfs_write_inode_nolock(result);
 	}
 	d_instantiate(dentry, result);
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	hpfs_unlock(dir->i_sb);
 	return 0;
 
 bail2:
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -232,8 +225,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 	if (!(mode & 0222)) dee.read_only = 1;
 	dee.archive = 1;
 	dee.hidden = name[0] == '.';
-	dee.fnode = fno;
-	dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+	dee.fnode = cpu_to_le32(fno);
+	dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
 
 	result = new_inode(dir->i_sb);
 	if (!result)
@@ -242,7 +235,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 	hpfs_init_inode(result);
 	result->i_ino = fno;
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
 	result->i_ctime.tv_nsec = 0;
 	result->i_mtime.tv_nsec = 0;
 	result->i_atime.tv_nsec = 0;
@@ -254,8 +247,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 	result->i_blocks = 1;
 	init_special_inode(result, mode, rdev);
 
-	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee);
 	if (r == 1)
 		goto bail2;
 	if (r == -1) {
@@ -264,19 +256,17 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
 	}
 	fnode->len = len;
 	memcpy(fnode->name, name, len > 15 ? 15 : len);
-	fnode->up = dir->i_ino;
+	fnode->up = cpu_to_le32(dir->i_ino);
 	mark_buffer_dirty(bh);
 
 	insert_inode_hash(result);
 
 	hpfs_write_inode_nolock(result);
 	d_instantiate(dentry, result);
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	brelse(bh);
 	hpfs_unlock(dir->i_sb);
 	return 0;
 bail2:
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -310,8 +300,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 	memset(&dee, 0, sizeof dee);
 	dee.archive = 1;
 	dee.hidden = name[0] == '.';
-	dee.fnode = fno;
-	dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+	dee.fnode = cpu_to_le32(fno);
+	dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
 
 	result = new_inode(dir->i_sb);
 	if (!result)
@@ -319,7 +309,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 	result->i_ino = fno;
 	hpfs_init_inode(result);
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
 	result->i_ctime.tv_nsec = 0;
 	result->i_mtime.tv_nsec = 0;
 	result->i_atime.tv_nsec = 0;
@@ -333,8 +323,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 	result->i_op = &page_symlink_inode_operations;
 	result->i_data.a_ops = &hpfs_symlink_aops;
 
-	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee);
 	if (r == 1)
 		goto bail2;
 	if (r == -1) {
@@ -343,7 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 	}
 	fnode->len = len;
 	memcpy(fnode->name, name, len > 15 ? 15 : len);
-	fnode->up = dir->i_ino;
+	fnode->up = cpu_to_le32(dir->i_ino);
 	hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink));
 	mark_buffer_dirty(bh);
 	brelse(bh);
@@ -352,11 +341,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
 
 	hpfs_write_inode_nolock(result);
 	d_instantiate(dentry, result);
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	hpfs_unlock(dir->i_sb);
 	return 0;
 bail2:
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
 	iput(result);
 bail1:
 	brelse(bh);
@@ -374,7 +361,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
 	struct hpfs_dirent *de;
 	struct inode *inode = dentry->d_inode;
 	dnode_secno dno;
-	fnode_secno fno;
 	int r;
 	int rep = 0;
 	int err;
@@ -382,8 +368,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
 	hpfs_lock(dir->i_sb);
 	hpfs_adjust_length(name, &len);
 again:
-	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
-	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
 	if (!de)
@@ -397,7 +381,6 @@ again:
 	if (de->directory)
 		goto out1;
 
-	fno = de->fnode;
 	r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
 	switch (r) {
 	case 1:
@@ -410,8 +393,6 @@ again:
 		if (rep++)
 			break;
 
-		mutex_unlock(&hpfs_i(dir)->i_mutex);
-		mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 		dentry_unhash(dentry);
 		if (!d_unhashed(dentry)) {
 			dput(dentry);
@@ -445,8 +426,6 @@ again:
 out1:
 	hpfs_brelse4(&qbh);
 out:
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
-	mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 	hpfs_unlock(dir->i_sb);
 	return err;
 }
@@ -459,15 +438,12 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 	struct hpfs_dirent *de;
 	struct inode *inode = dentry->d_inode;
 	dnode_secno dno;
-	fnode_secno fno;
 	int n_items = 0;
 	int err;
 	int r;
 
 	hpfs_adjust_length(name, &len);
 	hpfs_lock(dir->i_sb);
-	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
-	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
 	if (!de)
@@ -486,7 +462,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (n_items)
 		goto out1;
 
-	fno = de->fnode;
 	r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
 	switch (r) {
 	case 1:
@@ -505,8 +480,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 out1:
 	hpfs_brelse4(&qbh);
 out:
-	mutex_unlock(&hpfs_i(dir)->i_mutex);
-	mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 	hpfs_unlock(dir->i_sb);
 	return err;
 }
@@ -568,12 +541,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	hpfs_lock(i->i_sb);
 	/* order doesn't matter, due to VFS exclusion */
-	mutex_lock(&hpfs_i(i)->i_parent_mutex);
-	if (new_inode)
-		mutex_lock(&hpfs_i(new_inode)->i_parent_mutex);
-	mutex_lock(&hpfs_i(old_dir)->i_mutex);
-	if (new_dir != old_dir)
-		mutex_lock(&hpfs_i(new_dir)->i_mutex);
 	
 	/* Erm? Moving over the empty non-busy directory is perfectly legal */
 	if (new_inode && S_ISDIR(new_inode->i_mode)) {
@@ -610,9 +577,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (new_dir == old_dir) hpfs_brelse4(&qbh);
 
-	hpfs_lock_creation(i->i_sb);
-	if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de, 1))) {
-		hpfs_unlock_creation(i->i_sb);
+	if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de))) {
 		if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!");
 		err = r == 1 ? -ENOSPC : -EFSERROR;
 		if (new_dir != old_dir) hpfs_brelse4(&qbh);
@@ -621,20 +586,17 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	
 	if (new_dir == old_dir)
 		if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
-			hpfs_unlock_creation(i->i_sb);
 			hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2");
 			err = -ENOENT;
 			goto end1;
 		}
 
 	if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) {
-		hpfs_unlock_creation(i->i_sb);
 		hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent");
 		err = r == 2 ? -ENOSPC : -EFSERROR;
 		goto end1;
 	}
-	hpfs_unlock_creation(i->i_sb);
-	
+
 	end:
 	hpfs_i(i)->i_parent_dir = new_dir->i_ino;
 	if (S_ISDIR(i->i_mode)) {
@@ -642,22 +604,14 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		drop_nlink(old_dir);
 	}
 	if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) {
-		fnode->up = new_dir->i_ino;
+		fnode->up = cpu_to_le32(new_dir->i_ino);
 		fnode->len = new_len;
 		memcpy(fnode->name, new_name, new_len>15?15:new_len);
 		if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len);
 		mark_buffer_dirty(bh);
 		brelse(bh);
 	}
-	hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv;
-	hpfs_decide_conv(i, new_name, new_len);
 end1:
-	if (old_dir != new_dir)
-		mutex_unlock(&hpfs_i(new_dir)->i_mutex);
-	mutex_unlock(&hpfs_i(old_dir)->i_mutex);
-	mutex_unlock(&hpfs_i(i)->i_parent_mutex);
-	if (new_inode)
-		mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
 	hpfs_unlock(i->i_sb);
 	return err;
 }
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c89b40808587..98580a3b5005 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -18,15 +18,16 @@
 
 /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
 
-static void mark_dirty(struct super_block *s)
+static void mark_dirty(struct super_block *s, int remount)
 {
-	if (hpfs_sb(s)->sb_chkdsk && !(s->s_flags & MS_RDONLY)) {
+	if (hpfs_sb(s)->sb_chkdsk && (remount || !(s->s_flags & MS_RDONLY))) {
 		struct buffer_head *bh;
 		struct hpfs_spare_block *sb;
 		if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
 			sb->dirty = 1;
 			sb->old_wrote = 0;
 			mark_buffer_dirty(bh);
+			sync_dirty_buffer(bh);
 			brelse(bh);
 		}
 	}
@@ -40,10 +41,12 @@ static void unmark_dirty(struct super_block *s)
 	struct buffer_head *bh;
 	struct hpfs_spare_block *sb;
 	if (s->s_flags & MS_RDONLY) return;
+	sync_blockdev(s->s_bdev);
 	if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
 		sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error;
 		sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error;
 		mark_buffer_dirty(bh);
+		sync_dirty_buffer(bh);
 		brelse(bh);
 	}
 }
@@ -63,13 +66,13 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)
 	if (!hpfs_sb(s)->sb_was_error) {
 		if (hpfs_sb(s)->sb_err == 2) {
 			printk("; crashing the system because you wanted it\n");
-			mark_dirty(s);
+			mark_dirty(s, 0);
 			panic("HPFS panic");
 		} else if (hpfs_sb(s)->sb_err == 1) {
 			if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n");
 			else {
 				printk("; remounting read-only\n");
-				mark_dirty(s);
+				mark_dirty(s, 0);
 				s->s_flags |= MS_RDONLY;
 			}
 		} else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n");
@@ -102,9 +105,12 @@ static void hpfs_put_super(struct super_block *s)
 {
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 
+	hpfs_lock(s);
+	unmark_dirty(s);
+	hpfs_unlock(s);
+
 	kfree(sbi->sb_cp_table);
 	kfree(sbi->sb_bmp_dir);
-	unmark_dirty(s);
 	s->s_fs_info = NULL;
 	kfree(sbi);
 }
@@ -129,7 +135,7 @@ static unsigned count_bitmaps(struct super_block *s)
 	n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14;
 	count = 0;
 	for (n = 0; n < n_bands; n++)
-		count += hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_bmp_dir[n]);
+		count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n]));
 	return count;
 }
 
@@ -188,8 +194,6 @@ static void init_once(void *foo)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
 
-	mutex_init(&ei->i_mutex);
-	mutex_init(&ei->i_parent_mutex);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -218,7 +222,6 @@ static void destroy_inodecache(void)
 
 enum {
 	Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis,
-	Opt_conv_binary, Opt_conv_text, Opt_conv_auto,
 	Opt_check_none, Opt_check_normal, Opt_check_strict,
 	Opt_err_cont, Opt_err_ro, Opt_err_panic,
 	Opt_eas_no, Opt_eas_ro, Opt_eas_rw,
@@ -233,9 +236,6 @@ static const match_table_t tokens = {
 	{Opt_umask, "umask=%o"},
 	{Opt_case_lower, "case=lower"},
 	{Opt_case_asis, "case=asis"},
-	{Opt_conv_binary, "conv=binary"},
-	{Opt_conv_text, "conv=text"},
-	{Opt_conv_auto, "conv=auto"},
 	{Opt_check_none, "check=none"},
 	{Opt_check_normal, "check=normal"},
 	{Opt_check_strict, "check=strict"},
@@ -253,7 +253,7 @@ static const match_table_t tokens = {
 };
 
 static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
-		      int *lowercase, int *conv, int *eas, int *chk, int *errs,
+		      int *lowercase, int *eas, int *chk, int *errs,
 		      int *chkdsk, int *timeshift)
 {
 	char *p;
@@ -295,15 +295,6 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
 		case Opt_case_asis:
 			*lowercase = 0;
 			break;
-		case Opt_conv_binary:
-			*conv = CONV_BINARY;
-			break;
-		case Opt_conv_text:
-			*conv = CONV_TEXT;
-			break;
-		case Opt_conv_auto:
-			*conv = CONV_AUTO;
-			break;
 		case Opt_check_none:
 			*chk = 0;
 			break;
@@ -370,9 +361,6 @@ HPFS filesystem options:\n\
       umask=xxx         set mode of files that don't have mode specified in eas\n\
       case=lower        lowercase all files\n\
       case=asis         do not lowercase files (default)\n\
-      conv=binary       do not convert CR/LF -> LF (default)\n\
-      conv=auto         convert only files with known text extensions\n\
-      conv=text         convert all files\n\
       check=none        no fs checks - kernel may crash on corrupted filesystem\n\
       check=normal      do some checks - it should not crash (default)\n\
       check=strict      do extra time-consuming checks, used for debugging\n\
@@ -394,7 +382,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 	uid_t uid;
 	gid_t gid;
 	umode_t umask;
-	int lowercase, conv, eas, chk, errs, chkdsk, timeshift;
+	int lowercase, eas, chk, errs, chkdsk, timeshift;
 	int o;
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 	char *new_opts = kstrdup(data, GFP_KERNEL);
@@ -405,11 +393,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 	lock_super(s);
 	uid = sbi->sb_uid; gid = sbi->sb_gid;
 	umask = 0777 & ~sbi->sb_mode;
-	lowercase = sbi->sb_lowercase; conv = sbi->sb_conv;
+	lowercase = sbi->sb_lowercase;
 	eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk;
 	errs = sbi->sb_err; timeshift = sbi->sb_timeshift;
 
-	if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &conv,
+	if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase,
 	    &eas, &chk, &errs, &chkdsk, &timeshift))) {
 		printk("HPFS: bad mount options.\n");
 		goto out_err;
@@ -427,11 +415,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 
 	sbi->sb_uid = uid; sbi->sb_gid = gid;
 	sbi->sb_mode = 0777 & ~umask;
-	sbi->sb_lowercase = lowercase; sbi->sb_conv = conv;
+	sbi->sb_lowercase = lowercase;
 	sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;
 	sbi->sb_err = errs; sbi->sb_timeshift = timeshift;
 
-	if (!(*flags & MS_RDONLY)) mark_dirty(s);
+	if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
 
 	replace_mount_options(s, new_opts);
 
@@ -471,7 +459,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	uid_t uid;
 	gid_t gid;
 	umode_t umask;
-	int lowercase, conv, eas, chk, errs, chkdsk, timeshift;
+	int lowercase, eas, chk, errs, chkdsk, timeshift;
 
 	dnode_secno root_dno;
 	struct hpfs_dirent *de = NULL;
@@ -479,11 +467,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 
 	int o;
 
-	if (num_possible_cpus() > 1) {
-		printk(KERN_ERR "HPFS is not SMP safe\n");
-		return -EINVAL;
-	}
-
 	save_mount_options(s, options);
 
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
@@ -495,20 +478,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	sbi->sb_bmp_dir = NULL;
 	sbi->sb_cp_table = NULL;
 
-	mutex_init(&sbi->hpfs_creation_de);
+	mutex_init(&sbi->hpfs_mutex);
+	hpfs_lock(s);
 
 	uid = current_uid();
 	gid = current_gid();
 	umask = current_umask();
 	lowercase = 0;
-	conv = CONV_BINARY;
 	eas = 2;
 	chk = 1;
 	errs = 1;
 	chkdsk = 1;
 	timeshift = 0;
 
-	if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &conv,
+	if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase,
 	    &eas, &chk, &errs, &chkdsk, &timeshift))) {
 		printk("HPFS: bad mount options.\n");
 		goto bail0;
@@ -526,9 +509,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3;
 
 	/* Check magics */
-	if (/*bootblock->magic != BB_MAGIC
-	    ||*/ superblock->magic != SB_MAGIC
-	    || spareblock->magic != SP_MAGIC) {
+	if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC
+	    ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC
+	    || le32_to_cpu(spareblock->magic) != SP_MAGIC) {
 		if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n");
 		goto bail4;
 	}
@@ -549,19 +532,18 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	s->s_op = &hpfs_sops;
 	s->s_d_op = &hpfs_dentry_operations;
 
-	sbi->sb_root = superblock->root;
-	sbi->sb_fs_size = superblock->n_sectors;
-	sbi->sb_bitmaps = superblock->bitmaps;
-	sbi->sb_dirband_start = superblock->dir_band_start;
-	sbi->sb_dirband_size = superblock->n_dir_band;
-	sbi->sb_dmap = superblock->dir_band_bitmap;
+	sbi->sb_root = le32_to_cpu(superblock->root);
+	sbi->sb_fs_size = le32_to_cpu(superblock->n_sectors);
+	sbi->sb_bitmaps = le32_to_cpu(superblock->bitmaps);
+	sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start);
+	sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band);
+	sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap);
 	sbi->sb_uid = uid;
 	sbi->sb_gid = gid;
 	sbi->sb_mode = 0777 & ~umask;
 	sbi->sb_n_free = -1;
 	sbi->sb_n_free_dnodes = -1;
 	sbi->sb_lowercase = lowercase;
-	sbi->sb_conv = conv;
 	sbi->sb_eas = eas;
 	sbi->sb_chk = chk;
 	sbi->sb_chkdsk = chkdsk;
@@ -573,7 +555,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	sbi->sb_max_fwd_alloc = 0xffffff;
 	
 	/* Load bitmap directory */
-	if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, superblock->bitmaps)))
+	if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps))))
 		goto bail4;
 	
 	/* Check for general fs errors*/
@@ -591,20 +573,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 		mark_buffer_dirty(bh2);
 	}
 
-	if (spareblock->hotfixes_used || spareblock->n_spares_used) {
+	if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) {
 		if (errs >= 2) {
 			printk("HPFS: Hotfixes not supported here, try chkdsk\n");
-			mark_dirty(s);
+			mark_dirty(s, 0);
 			goto bail4;
 		}
 		hpfs_error(s, "hotfixes not supported here, try chkdsk");
 		if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n");
 		else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n");
 	}
-	if (spareblock->n_dnode_spares != spareblock->n_dnode_spares_free) {
+	if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) {
 		if (errs >= 2) {
 			printk("HPFS: Spare dnodes used, try chkdsk\n");
-			mark_dirty(s);
+			mark_dirty(s, 0);
 			goto bail4;
 		}
 		hpfs_error(s, "warning: spare dnodes used, try chkdsk");
@@ -612,26 +594,26 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	}
 	if (chk) {
 		unsigned a;
-		if (superblock->dir_band_end - superblock->dir_band_start + 1 != superblock->n_dir_band ||
-		    superblock->dir_band_end < superblock->dir_band_start || superblock->n_dir_band > 0x4000) {
+		if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) ||
+		    le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) {
 			hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x",
-				superblock->dir_band_start, superblock->dir_band_end, superblock->n_dir_band);
+				le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->dir_band_end), le32_to_cpu(superblock->n_dir_band));
 			goto bail4;
 		}
 		a = sbi->sb_dirband_size;
 		sbi->sb_dirband_size = 0;
-		if (hpfs_chk_sectors(s, superblock->dir_band_start, superblock->n_dir_band, "dir_band") ||
-		    hpfs_chk_sectors(s, superblock->dir_band_bitmap, 4, "dir_band_bitmap") ||
-		    hpfs_chk_sectors(s, superblock->bitmaps, 4, "bitmaps")) {
-			mark_dirty(s);
+		if (hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->n_dir_band), "dir_band") ||
+		    hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_bitmap), 4, "dir_band_bitmap") ||
+		    hpfs_chk_sectors(s, le32_to_cpu(superblock->bitmaps), 4, "bitmaps")) {
+			mark_dirty(s, 0);
 			goto bail4;
 		}
 		sbi->sb_dirband_size = a;
 	} else printk("HPFS: You really don't want any checks? You are crazy...\n");
 
 	/* Load code page table */
-	if (spareblock->n_code_pages)
-		if (!(sbi->sb_cp_table = hpfs_load_code_page(s, spareblock->code_page_dir)))
+	if (le32_to_cpu(spareblock->n_code_pages))
+		if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir))))
 			printk("HPFS: Warning: code page support is disabled\n");
 
 	brelse(bh2);
@@ -660,13 +642,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	if (!de)
 		hpfs_error(s, "unable to find root dir");
 	else {
-		root->i_atime.tv_sec = local_to_gmt(s, de->read_date);
+		root->i_atime.tv_sec = local_to_gmt(s, le32_to_cpu(de->read_date));
 		root->i_atime.tv_nsec = 0;
-		root->i_mtime.tv_sec = local_to_gmt(s, de->write_date);
+		root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
 		root->i_mtime.tv_nsec = 0;
-		root->i_ctime.tv_sec = local_to_gmt(s, de->creation_date);
+		root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
 		root->i_ctime.tv_nsec = 0;
-		hpfs_i(root)->i_ea_size = de->ea_size;
+		hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size);
 		hpfs_i(root)->i_parent_dir = root->i_ino;
 		if (root->i_size == -1)
 			root->i_size = 2048;
@@ -674,6 +656,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 			root->i_blocks = 5;
 		hpfs_brelse4(&qbh);
 	}
+	hpfs_unlock(s);
 	return 0;
 
 bail4:	brelse(bh2);
@@ -681,6 +664,7 @@ bail3:	brelse(bh1);
 bail2:	brelse(bh0);
 bail1:
 bail0:
+	hpfs_unlock(s);
 	kfree(sbi->sb_bmp_dir);
 	kfree(sbi->sb_cp_table);
 	s->s_fs_info = NULL;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 20af62f4304b..6e28000a4b21 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
 	int ret;
 	struct timespec now = current_kernel_time();
 
+	*cbh = NULL;
+
 	if (is_journal_aborted(journal))
 		return 0;
 
@@ -806,7 +808,7 @@ wait_for_iobuf:
 		if (err)
 			__jbd2_journal_abort_hard(journal);
 	}
-	if (!err && !is_journal_aborted(journal))
+	if (cbh)
 		err = journal_wait_on_commit_record(journal, cbh);
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
 				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index aba8ebaec25c..e0ec3db1c395 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device)
 	new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
 	if (!new_dev)
 		return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
+	bd = bdget(device);
 	spin_lock(&devname_cache_lock);
 	if (devcache[i]) {
 		if (devcache[i]->device == device) {
 			kfree(new_dev);
+			bdput(bd);
 			ret = devcache[i]->devname;
 			spin_unlock(&devname_cache_lock);
 			return ret;
@@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device)
 	}
 	devcache[i] = new_dev;
 	devcache[i]->device = device;
-	bd = bdget(device);
 	if (bd) {
 		bdevname(bd, devcache[i]->devname);
 		bdput(bd);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/gfp.h>
+#include <linux/prefetch.h>
 
 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
 
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 33435e4b14d2..ce03a182c771 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -480,10 +480,6 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
 			!read_only)
 		return -EIO;
 
-	mutex_init(&super->s_dirop_mutex);
-	mutex_init(&super->s_object_alias_mutex);
-	INIT_LIST_HEAD(&super->s_freeing_list);
-
 	ret = logfs_init_rw(sb);
 	if (ret)
 		return ret;
@@ -601,6 +597,10 @@ static struct dentry *logfs_mount(struct file_system_type *type, int flags,
 	if (!super)
 		return ERR_PTR(-ENOMEM);
 
+	mutex_init(&super->s_dirop_mutex);
+	mutex_init(&super->s_object_alias_mutex);
+	INIT_LIST_HEAD(&super->s_freeing_list);
+
 	if (!devname)
 		err = logfs_get_sb_bdev(super, type, devname);
 	else if (strncmp(devname, "mtd", 3))
diff --git a/fs/namei.c b/fs/namei.c
index e6cd6113872c..e3c4f112ebf7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
 static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
 		int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
 {
-	umode_t			mode = inode->i_mode;
+	unsigned int mode = inode->i_mode;
 
 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
 
@@ -697,6 +697,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
 		do {
 			seq = read_seqcount_begin(&fs->seq);
 			nd->root = fs->root;
+			nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
 		} while (read_seqcount_retry(&fs->seq, seq));
 	}
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 7dba2ed03429..d99bcf59e4c2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1030,18 +1030,6 @@ const struct seq_operations mounts_op = {
 	.show	= show_vfsmnt
 };
 
-static int uuid_is_nil(u8 *uuid)
-{
-	int i;
-	u8  *cp = (u8 *)uuid;
-
-	for (i = 0; i < 16; i++) {
-		if (*cp++)
-			return 0;
-	}
-	return 1;
-}
-
 static int show_mountinfo(struct seq_file *m, void *v)
 {
 	struct proc_mounts *p = m->private;
@@ -1085,10 +1073,6 @@ static int show_mountinfo(struct seq_file *m, void *v)
 	if (IS_MNT_UNBINDABLE(mnt))
 		seq_puts(m, " unbindable");
 
-	if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
-		/* print the uuid */
-		seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
-
 	/* Filesystem specific data */
 	seq_puts(m, " - ");
 	show_type(m, sb);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 89fc160fd5b0..1f063bacd285 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@ Elong:
 }
 
 #ifdef CONFIG_NFS_V4
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode)
+static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
 {
 	struct gss_api_mech *mech;
 	struct xdr_netobj oid;
@@ -166,7 +166,7 @@ static int nfs_negotiate_security(const struct dentry *parent,
 		}
 		flavors = page_address(page);
 		ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
-		*flavor = nfs_find_best_sec(flavors, dentry->d_inode);
+		*flavor = nfs_find_best_sec(flavors);
 		put_page(page);
 	}
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1c261ddd65d..c4a69833dd0d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
 	NFS4CLNT_LAYOUTRECALL,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_RECALL_SLOT,
+	NFS4CLNT_LEASE_CONFIRM,
 };
 
 enum nfs4_session_state {
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 6f8192f4cfc7..be79dc9f386d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -117,6 +117,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 	case -EKEYEXPIRED:
 		rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
 		break;
+	case -NFS4ERR_RETRY_UNCACHED_REP:
+		break;
 	default:
 		dprintk("%s DS error. Retry through MDS %d\n", __func__,
 			task->tk_status);
@@ -416,7 +418,8 @@ static int
 filelayout_check_layout(struct pnfs_layout_hdr *lo,
 			struct nfs4_filelayout_segment *fl,
 			struct nfs4_layoutget_res *lgr,
-			struct nfs4_deviceid *id)
+			struct nfs4_deviceid *id,
+			gfp_t gfp_flags)
 {
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
@@ -439,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 	/* find and reference the deviceid */
 	dsaddr = nfs4_fl_find_get_deviceid(id);
 	if (dsaddr == NULL) {
-		dsaddr = get_device_info(lo->plh_inode, id);
+		dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
 		if (dsaddr == NULL)
 			goto out;
 	}
@@ -500,7 +503,8 @@ static int
 filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 			 struct nfs4_filelayout_segment *fl,
 			 struct nfs4_layoutget_res *lgr,
-			 struct nfs4_deviceid *id)
+			 struct nfs4_deviceid *id,
+			 gfp_t gfp_flags)
 {
 	struct xdr_stream stream;
 	struct xdr_buf buf = {
@@ -516,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 
 	dprintk("%s: set_layout_map Begin\n", __func__);
 
-	scratch = alloc_page(GFP_KERNEL);
+	scratch = alloc_page(gfp_flags);
 	if (!scratch)
 		return -ENOMEM;
 
@@ -554,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 		goto out_err;
 
 	fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
-			       GFP_KERNEL);
+			       gfp_flags);
 	if (!fl->fh_array)
 		goto out_err;
 
 	for (i = 0; i < fl->num_fh; i++) {
 		/* Do we want to use a mempool here? */
-		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
+		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
 		if (!fl->fh_array[i])
 			goto out_err_free;
 
@@ -605,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 
 static struct pnfs_layout_segment *
 filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
-		      struct nfs4_layoutget_res *lgr)
+		      struct nfs4_layoutget_res *lgr,
+		      gfp_t gfp_flags)
 {
 	struct nfs4_filelayout_segment *fl;
 	int rc;
 	struct nfs4_deviceid id;
 
 	dprintk("--> %s\n", __func__);
-	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+	fl = kzalloc(sizeof(*fl), gfp_flags);
 	if (!fl)
 		return NULL;
 
-	rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
-	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
+	rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
+	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
 		_filelayout_free_lseg(fl);
 		return NULL;
 	}
@@ -633,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 		int size = (fl->stripe_type == STRIPE_SPARSE) ?
 			fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
 
-		fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
+		fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
 		if (!fl->commit_buckets) {
 			filelayout_free_lseg(&fl->generic_hdr);
 			return NULL;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 7c44579f5832..2b461d77b43a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr *
 nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
 extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
 
 #endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index de5350f2b249..db07c7af1395 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 }
 
 static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
+nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
 {
 	struct nfs4_pnfs_ds *tmp_ds, *ds;
 
-	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
+	ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
 	if (!ds)
 		goto out;
 
@@ -261,7 +261,7 @@ out:
  * Currently only support ipv4, and one multi-path address.
  */
 static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
+decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
 {
 	struct nfs4_pnfs_ds *ds = NULL;
 	char *buf;
@@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
 			rlen);
 		goto out_err;
 	}
-	buf = kmalloc(rlen + 1, GFP_KERNEL);
+	buf = kmalloc(rlen + 1, gfp_flags);
 	if (!buf) {
 		dprintk("%s: Not enough memory\n", __func__);
 		goto out_err;
@@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
 	sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
 	port = htons((tmp[0] << 8) | (tmp[1]));
 
-	ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
+	ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
 	dprintk("%s: Decoded address and port %s\n", __func__, buf);
 out_free:
 	kfree(buf);
@@ -343,7 +343,7 @@ out_err:
 
 /* Decode opaque device data and return the result */
 static struct nfs4_file_layout_dsaddr*
-decode_device(struct inode *ino, struct pnfs_device *pdev)
+decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 {
 	int i;
 	u32 cnt, num;
@@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 	struct page *scratch;
 
 	/* set up xdr stream */
-	scratch = alloc_page(GFP_KERNEL);
+	scratch = alloc_page(gfp_flags);
 	if (!scratch)
 		goto out_err;
 
@@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 	}
 
 	/* read stripe indices */
-	stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
+	stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
 	if (!stripe_indices)
 		goto out_err_free_scratch;
 
@@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 
 	dsaddr = kzalloc(sizeof(*dsaddr) +
 			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
-			GFP_KERNEL);
+			gfp_flags);
 	if (!dsaddr)
 		goto out_err_free_stripe_indices;
 
@@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 		for (j = 0; j < mp_count; j++) {
 			if (j == 0) {
 				dsaddr->ds_list[i] = decode_and_add_ds(&stream,
-					ino);
+					ino, gfp_flags);
 				if (dsaddr->ds_list[i] == NULL)
 					goto out_err_free_deviceid;
 			} else {
@@ -503,12 +503,12 @@ out_err:
  * available devices.
  */
 static struct nfs4_file_layout_dsaddr *
-decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
+decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
 {
 	struct nfs4_file_layout_dsaddr *d, *new;
 	long hash;
 
-	new = decode_device(inode, dev);
+	new = decode_device(inode, dev, gfp_flags);
 	if (!new) {
 		printk(KERN_WARNING "%s: Could not decode or add device\n",
 			__func__);
@@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
  * of available devices, and return it.
  */
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
 {
 	struct pnfs_device *pdev = NULL;
 	u32 max_resp_sz;
@@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
 	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
 		__func__, inode, max_resp_sz, max_pages);
 
-	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
+	pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
 	if (pdev == NULL)
 		return NULL;
 
-	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 	if (pages == NULL) {
 		kfree(pdev);
 		return NULL;
 	}
 	for (i = 0; i < max_pages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
+		pages[i] = alloc_page(gfp_flags);
 		if (!pages[i])
 			goto out_free;
 	}
@@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
 	 * Found new device, need to decode it and then add it to the
 	 * list of known devices for this mountpoint.
 	 */
-	dsaddr = decode_and_add_device(inode, pdev);
+	dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
 out_free:
 	for (i = 0; i < max_pages; i++)
 		__free_page(pages[i]);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9bf41eab3e46..cf1b339c3937 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -46,6 +46,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/nfs_mount.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/module.h>
@@ -299,6 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
 			ret = nfs4_delay(server->client, &exception->timeout);
 			if (ret != 0)
 				break;
+		case -NFS4ERR_RETRY_UNCACHED_REP:
 		case -NFS4ERR_OLD_STATEID:
 			exception->retry = 1;
 			break;
@@ -443,8 +445,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 	if (res->sr_status == 1)
 		res->sr_status = NFS_OK;
 
-	/* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
-	if (!res->sr_slot)
+	/* don't increment the sequence number if the task wasn't sent */
+	if (!RPC_WAS_SENT(task))
 		goto out;
 
 	/* Check the SEQUENCE operation status */
@@ -2185,9 +2187,14 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(server,
-				_nfs4_lookup_root(server, fhandle, info),
-				&exception);
+		err = _nfs4_lookup_root(server, fhandle, info);
+		switch (err) {
+		case 0:
+		case -NFS4ERR_WRONGSEC:
+			break;
+		default:
+			err = nfs4_handle_exception(server, err, &exception);
+		}
 	} while (exception.retry);
 	return err;
 }
@@ -2208,25 +2215,47 @@ out:
 	return ret;
 }
 
-/*
- * get the file handle for the "/" directory on the server
- */
-static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 			      struct nfs_fsinfo *info)
 {
 	int i, len, status = 0;
-	rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2];
+	rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS];
 
-	flav_array[0] = RPC_AUTH_UNIX;
-	len = gss_mech_list_pseudoflavors(&flav_array[1]);
-	flav_array[1+len] = RPC_AUTH_NULL;
-	len += 2;
+	len = gss_mech_list_pseudoflavors(&flav_array[0]);
+	flav_array[len] = RPC_AUTH_NULL;
+	len += 1;
 
 	for (i = 0; i < len; i++) {
 		status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
-		if (status != -EPERM)
-			break;
+		if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+			continue;
+		break;
 	}
+	/*
+	 * -EACCESS could mean that the user doesn't have correct permissions
+	 * to access the mount.  It could also mean that we tried to mount
+	 * with a gss auth flavor, but rpc.gssd isn't running.  Either way,
+	 * existing mount programs don't handle -EACCES very well so it should
+	 * be mapped to -EPERM instead.
+	 */
+	if (status == -EACCES)
+		status = -EPERM;
+	return status;
+}
+
+/*
+ * get the file handle for the "/" directory on the server
+ */
+static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+			      struct nfs_fsinfo *info)
+{
+	int status = nfs4_lookup_root(server, fhandle, info);
+	if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
+		/*
+		 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
+		 * by nfs4_map_errors() as this function exits.
+		 */
+		status = nfs4_find_root_sec(server, fhandle, info);
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
@@ -3667,6 +3696,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
 			return -EAGAIN;
+		case -NFS4ERR_RETRY_UNCACHED_REP:
 		case -NFS4ERR_OLD_STATEID:
 			task->tk_status = 0;
 			return -EAGAIN;
@@ -3723,21 +3753,20 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 				sizeof(setclientid.sc_uaddr), "%s.%u.%u",
 				clp->cl_ipaddr, port >> 8, port & 255);
 
-		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+		status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 		if (status != -NFS4ERR_CLID_INUSE)
 			break;
-		if (signalled())
+		if (loop != 0) {
+			++clp->cl_id_uniquifier;
 			break;
-		if (loop++ & 1)
-			ssleep(clp->cl_lease_time / HZ + 1);
-		else
-			if (++clp->cl_id_uniquifier == 0)
-				break;
+		}
+		++loop;
+		ssleep(clp->cl_lease_time / HZ + 1);
 	}
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
 		struct nfs4_setclientid_res *arg,
 		struct rpc_cred *cred)
 {
@@ -3752,7 +3781,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
 	int status;
 
 	now = jiffies;
-	status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 	if (status == 0) {
 		spin_lock(&clp->cl_lock);
 		clp->cl_lease_time = fsinfo.lease_time * HZ;
@@ -3762,26 +3791,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
-		struct nfs4_setclientid_res *arg,
-		struct rpc_cred *cred)
-{
-	long timeout = 0;
-	int err;
-	do {
-		err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
-		switch (err) {
-			case 0:
-				return err;
-			case -NFS4ERR_RESOURCE:
-				/* The IBM lawyers misread another document! */
-			case -NFS4ERR_DELAY:
-				err = nfs4_delay(clp->cl_rpcclient, &timeout);
-		}
-	} while (err == 0);
-	return err;
-}
-
 struct nfs4_delegreturndata {
 	struct nfs4_delegreturnargs args;
 	struct nfs4_delegreturnres res;
@@ -4786,7 +4795,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 				init_utsname()->domainname,
 				clp->cl_rpcclient->cl_auth->au_flavor);
 
-	status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 	if (!status)
 		status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
 	dprintk("<-- %s status= %d\n", __func__, status);
@@ -4837,6 +4846,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
 		dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
 		rpc_delay(task, NFS4_POLL_RETRY_MIN);
 		task->tk_status = 0;
+		/* fall through */
+	case -NFS4ERR_RETRY_UNCACHED_REP:
 		nfs_restart_rpc(task, data->clp);
 		return;
 	}
@@ -4869,7 +4880,8 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 		.rpc_client = clp->cl_rpcclient,
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_get_lease_time_ops,
-		.callback_data = &data
+		.callback_data = &data,
+		.flags = RPC_TASK_TIMEOUT,
 	};
 	int status;
 
@@ -5171,7 +5183,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
 	nfs4_init_channel_attrs(&args);
 	args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
 
-	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0);
+	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 
 	if (!status)
 		/* Verify the session's negotiated channel_attrs values */
@@ -5194,20 +5206,10 @@ int nfs4_proc_create_session(struct nfs_client *clp)
 	int status;
 	unsigned *ptr;
 	struct nfs4_session *session = clp->cl_session;
-	long timeout = 0;
-	int err;
 
 	dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
 
-	do {
-		status = _nfs4_proc_create_session(clp);
-		if (status == -NFS4ERR_DELAY) {
-			err = nfs4_delay(clp->cl_rpcclient, &timeout);
-			if (err)
-				status = err;
-		}
-	} while (status == -NFS4ERR_DELAY);
-
+	status = _nfs4_proc_create_session(clp);
 	if (status)
 		goto out;
 
@@ -5248,7 +5250,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
 	msg.rpc_argp = session;
 	msg.rpc_resp = NULL;
 	msg.rpc_cred = NULL;
-	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0);
+	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 
 	if (status)
 		printk(KERN_WARNING
@@ -5481,6 +5483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
 		break;
 	case -NFS4ERR_DELAY:
 		rpc_delay(task, NFS4_POLL_RETRY_MAX);
+		/* fall through */
+	case -NFS4ERR_RETRY_UNCACHED_REP:
 		return -EAGAIN;
 	default:
 		nfs4_schedule_lease_recovery(clp);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a6804f704d9d..036f5adc9e1f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -64,10 +64,15 @@ static LIST_HEAD(nfs4_clientid_list);
 
 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
-	struct nfs4_setclientid_res clid;
+	struct nfs4_setclientid_res clid = {
+		.clientid = clp->cl_clientid,
+		.confirm = clp->cl_confirm,
+	};
 	unsigned short port;
 	int status;
 
+	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
+		goto do_confirm;
 	port = nfs_callback_tcpport;
 	if (clp->cl_addr.ss_family == AF_INET6)
 		port = nfs_callback_tcpport6;
@@ -75,10 +80,14 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
 	if (status != 0)
 		goto out;
+	clp->cl_clientid = clid.clientid;
+	clp->cl_confirm = clid.confirm;
+	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+do_confirm:
 	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
 	if (status != 0)
 		goto out;
-	clp->cl_clientid = clid.clientid;
+	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	nfs4_schedule_state_renewal(clp);
 out:
 	return status;
@@ -230,13 +239,18 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status;
 
+	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
+		goto do_confirm;
 	nfs4_begin_drain_session(clp);
 	status = nfs4_proc_exchange_id(clp, cred);
 	if (status != 0)
 		goto out;
+	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+do_confirm:
 	status = nfs4_proc_create_session(clp);
 	if (status != 0)
 		goto out;
+	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
 	nfs41_setup_state_renewal(clp);
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
@@ -1584,20 +1598,23 @@ static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
  */
 static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
 {
-	if (nfs4_has_session(clp)) {
-		switch (status) {
-		case -NFS4ERR_DELAY:
-		case -NFS4ERR_CLID_INUSE:
-		case -EAGAIN:
-			break;
+	switch (status) {
+	case -NFS4ERR_CLID_INUSE:
+	case -NFS4ERR_STALE_CLIENTID:
+		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+		break;
+	case -NFS4ERR_DELAY:
+	case -ETIMEDOUT:
+	case -EAGAIN:
+		ssleep(1);
+		break;
 
-		case -EKEYEXPIRED:
-			nfs4_warn_keyexpired(clp->cl_hostname);
-		case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
-					 * in nfs4_exchange_id */
-		default:
-			return;
-		}
+	case -EKEYEXPIRED:
+		nfs4_warn_keyexpired(clp->cl_hostname);
+	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+				 * in nfs4_exchange_id */
+	default:
+		return;
 	}
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 }
@@ -1607,7 +1624,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 	int status = 0;
 
 	/* Ensure exclusive access to NFSv4 state */
-	for(;;) {
+	do {
 		if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
 			/* We're going to have to re-establish a clientid */
 			status = nfs4_reclaim_lease(clp);
@@ -1691,7 +1708,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			break;
 		if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
 			break;
-	}
+	} while (atomic_read(&clp->cl_count) > 1);
 	return;
 out_error:
 	printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dddfb5795d7b..c3ccd2c46834 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1452,26 +1452,25 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
-	uint32_t attrs[2] = {0, 0};
+	uint32_t attrs[2] = {
+		FATTR4_WORD0_RDATTR_ERROR,
+		FATTR4_WORD1_MOUNTED_ON_FILEID,
+	};
 	uint32_t dircount = readdir->count >> 1;
 	__be32 *p;
 
 	if (readdir->plus) {
 		attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
-			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
+			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
 		attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
 			FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
 			FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
 			FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
 		dircount >>= 1;
 	}
-	attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
-	attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
-	/* Switch to mounted_on_fileid if the server supports it */
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
-		attrs[0] &= ~FATTR4_WORD0_FILEID;
-	else
-		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	/* Use mounted_on_fileid only if the server supports it */
+	if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
+		attrs[0] |= FATTR4_WORD0_FILEID;
 
 	p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
 	*p++ = cpu_to_be32(OP_READDIR);
@@ -3140,7 +3139,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
 			goto out_overflow;
 		xdr_decode_hyper(p, fileid);
 		bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
-		ret = NFS_ATTR_FATTR_FILEID;
+		ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
 	}
 	dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
 	return ret;
@@ -4002,7 +4001,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 {
 	int status;
 	umode_t fmode = 0;
-	uint64_t fileid;
 	uint32_t type;
 
 	status = decode_attr_type(xdr, bitmap, &type);
@@ -4101,13 +4099,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid);
+	status = decode_attr_mounted_on_fileid(xdr, bitmap, &fattr->mounted_on_fileid);
 	if (status < 0)
 		goto xdr_error;
-	if (status != 0 && !(fattr->valid & status)) {
-		fattr->fileid = fileid;
-		fattr->valid |= status;
-	}
+	fattr->valid |= status;
 
 xdr_error:
 	dprintk("%s: xdr returned %d\n", __func__, -status);
@@ -4838,17 +4833,21 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
 	struct nfs4_secinfo_flavor *sec_flavor;
 	int status;
 	__be32 *p;
-	int i;
+	int i, num_flavors;
 
 	status = decode_op_hdr(xdr, OP_SECINFO);
+	if (status)
+		goto out;
 	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(!p))
 		goto out_overflow;
-	res->flavors->num_flavors = be32_to_cpup(p);
 
-	for (i = 0; i < res->flavors->num_flavors; i++) {
+	res->flavors->num_flavors = 0;
+	num_flavors = be32_to_cpup(p);
+
+	for (i = 0; i < num_flavors; i++) {
 		sec_flavor = &res->flavors->flavors[i];
-		if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE)
+		if ((char *)&sec_flavor[1] - (char *)res->flavors > PAGE_SIZE)
 			break;
 
 		p = xdr_inline_decode(xdr, 4);
@@ -4857,13 +4856,15 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
 		sec_flavor->flavor = be32_to_cpup(p);
 
 		if (sec_flavor->flavor == RPC_AUTH_GSS) {
-			if (decode_secinfo_gss(xdr, sec_flavor))
-				break;
+			status = decode_secinfo_gss(xdr, sec_flavor);
+			if (status)
+				goto out;
 		}
+		res->flavors->num_flavors++;
 	}
 
-	return 0;
-
+out:
+	return status;
 out_overflow:
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
@@ -6408,7 +6409,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
 					entry->server, 1) < 0)
 		goto out_overflow;
-	if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
+	if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
+		entry->ino = entry->fattr->mounted_on_fileid;
+	else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
 		entry->ino = entry->fattr->fileid;
 
 	entry->d_type = DT_UNKNOWN;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d9ab97269ce6..f57f5281a520 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -383,6 +383,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
 				plh_layouts);
 		dprintk("%s freeing layout for inode %lu\n", __func__,
 			lo->plh_inode->i_ino);
+		list_del_init(&lo->plh_layouts);
 		pnfs_destroy_layout(NFS_I(lo->plh_inode));
 	}
 }
@@ -466,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 static struct pnfs_layout_segment *
 send_layoutget(struct pnfs_layout_hdr *lo,
 	   struct nfs_open_context *ctx,
-	   u32 iomode)
+	   u32 iomode,
+	   gfp_t gfp_flags)
 {
 	struct inode *ino = lo->plh_inode;
 	struct nfs_server *server = NFS_SERVER(ino);
@@ -479,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	dprintk("--> %s\n", __func__);
 
 	BUG_ON(ctx == NULL);
-	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+	lgp = kzalloc(sizeof(*lgp), gfp_flags);
 	if (lgp == NULL)
 		return NULL;
 
@@ -487,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
 	max_pages = max_resp_sz >> PAGE_SHIFT;
 
-	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 	if (!pages)
 		goto out_err_free;
 
 	for (i = 0; i < max_pages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
+		pages[i] = alloc_page(gfp_flags);
 		if (!pages[i])
 			goto out_err_free;
 	}
@@ -508,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	lgp->args.layout.pages = pages;
 	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
 	lgp->lsegpp = &lseg;
+	lgp->gfp_flags = gfp_flags;
 
 	/* Synchronously retrieve layout information from server and
 	 * store in lseg.
@@ -665,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 }
 
 static struct pnfs_layout_hdr *
-alloc_init_layout_hdr(struct inode *ino)
+alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
 {
 	struct pnfs_layout_hdr *lo;
 
-	lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
+	lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
 	if (!lo)
 		return NULL;
 	atomic_set(&lo->plh_refcount, 1);
@@ -681,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino)
 }
 
 static struct pnfs_layout_hdr *
-pnfs_find_alloc_layout(struct inode *ino)
+pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct pnfs_layout_hdr *new = NULL;
@@ -696,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino)
 			return nfsi->layout;
 	}
 	spin_unlock(&ino->i_lock);
-	new = alloc_init_layout_hdr(ino);
+	new = alloc_init_layout_hdr(ino, gfp_flags);
 	spin_lock(&ino->i_lock);
 
 	if (likely(nfsi->layout == NULL))	/* Won the race? */
@@ -756,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino,
 		   struct nfs_open_context *ctx,
-		   enum pnfs_iomode iomode)
+		   enum pnfs_iomode iomode,
+		   gfp_t gfp_flags)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
@@ -767,7 +771,7 @@ pnfs_update_layout(struct inode *ino,
 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
 		return NULL;
 	spin_lock(&ino->i_lock);
-	lo = pnfs_find_alloc_layout(ino);
+	lo = pnfs_find_alloc_layout(ino, gfp_flags);
 	if (lo == NULL) {
 		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
 		goto out_unlock;
@@ -807,7 +811,7 @@ pnfs_update_layout(struct inode *ino,
 		spin_unlock(&clp->cl_lock);
 	}
 
-	lseg = send_layoutget(lo, ctx, iomode);
+	lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
 	if (!lseg && first) {
 		spin_lock(&clp->cl_lock);
 		list_del_init(&lo->plh_layouts);
@@ -846,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 		goto out;
 	}
 	/* Inject layout blob into I/O device driver */
-	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
 	if (!lseg || IS_ERR(lseg)) {
 		if (!lseg)
 			status = -ENOMEM;
@@ -899,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   IOMODE_READ);
+						   IOMODE_READ,
+						   GFP_KERNEL);
 	}
 	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
@@ -921,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   IOMODE_RW);
+						   IOMODE_RW,
+						   GFP_NOFS);
 	}
 	return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
@@ -1004,6 +1010,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 {
 	struct nfs_inode *nfsi = NFS_I(wdata->inode);
 	loff_t end_pos = wdata->args.offset + wdata->res.count;
+	bool mark_as_dirty = false;
 
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
@@ -1011,13 +1018,18 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 		get_lseg(wdata->lseg);
 		wdata->lseg->pls_lc_cred =
 			get_rpccred(wdata->args.context->state->owner->so_cred);
-		mark_inode_dirty_sync(wdata->inode);
+		mark_as_dirty = true;
 		dprintk("%s: Set layoutcommit for inode %lu ",
 			__func__, wdata->inode->i_ino);
 	}
 	if (end_pos > wdata->lseg->pls_end_pos)
 		wdata->lseg->pls_end_pos = end_pos;
 	spin_unlock(&nfsi->vfs_inode.i_lock);
+
+	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
+	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
+	if (mark_as_dirty)
+		mark_inode_dirty_sync(wdata->inode);
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bc4827202e7a..0c015bad9e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type {
 	const u32 id;
 	const char *name;
 	struct module *owner;
-	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
+	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
 	void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
 	/* test for nfs page cache coalescing */
@@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type);
+		   enum pnfs_iomode access_type, gfp_t gfp_flags);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
 
 static inline struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-		   enum pnfs_iomode access_type)
+		   enum pnfs_iomode access_type, gfp_t gfp_flags)
 {
 	return NULL;
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cded2b12a05..2bcf0dc306a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg != NULL);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
 
 	ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
 				0, lseg);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2b8e9a5e366a..e288f06d3fa7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1004,6 +1004,7 @@ static int nfs_parse_security_flavors(char *value,
 		return 0;
 	}
 
+	mnt->flags |= NFS_MOUNT_SECFLAVOUR;
 	mnt->auth_flavor_len = 1;
 	return 1;
 }
@@ -1976,6 +1977,15 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	if (error < 0)
 		goto out;
 
+	/*
+	 * noac is a special case. It implies -o sync, but that's not
+	 * necessarily reflected in the mtab options. do_remount_sb
+	 * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the
+	 * remount options, so we have to explicitly reset it.
+	 */
+	if (data->flags & NFS_MOUNT_NOAC)
+		*flags |= MS_SYNCHRONOUS;
+
 	/* compare new mount options with old ones */
 	error = nfs_compare_remount_data(nfss, data);
 out:
@@ -2235,8 +2245,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
 	if (!s->s_root) {
 		/* initial superblock/root creation */
 		nfs_fill_super(s, data);
-		nfs_fscache_get_super_cookie(
-			s, data ? data->fscache_uniq : NULL, NULL);
+		nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
 	}
 
 	mntroot = nfs_get_root(s, mntfh, dev_name);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af0c6279a4a7..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -542,11 +542,15 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
 	if (!nfs_need_commit(nfsi))
 		return 0;
 
+	spin_lock(&inode->i_lock);
 	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
 	if (ret > 0)
 		nfsi->ncommit -= ret;
+	spin_unlock(&inode->i_lock);
+
 	if (nfs_need_commit(NFS_I(inode)))
 		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+
 	return ret;
 }
 #else
@@ -676,7 +680,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	req = nfs_setup_write_request(ctx, page, offset, count);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
-	nfs_mark_request_dirty(req);
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
@@ -936,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
 	atomic_set(&req->wb_complete, requests);
 
 	BUG_ON(desc->pg_lseg);
-	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+	lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
 	ClearPageError(page);
 	offset = 0;
 	nbytes = desc->pg_count;
@@ -1010,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
 	}
 	req = nfs_list_entry(data->pages.next);
 	if ((!lseg) && list_is_singular(&data->pages))
-		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
@@ -1414,8 +1417,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
                                 task->tk_pid, task->tk_status);
 
 	/* Call the NFS version-specific code */
-	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
-		return;
+	NFS_PROTO(data->inode)->commit_done(task, data);
 }
 
 void nfs_commit_release_pages(struct nfs_write_data *data)
@@ -1483,9 +1485,7 @@ int nfs_commit_inode(struct inode *inode, int how)
 	res = nfs_commit_set_lock(NFS_I(inode), may_wait);
 	if (res <= 0)
 		goto out_mark_dirty;
-	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
-	spin_unlock(&inode->i_lock);
 	if (res) {
 		int error;
 
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 0c6d81670137..7c831a2731fa 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,7 +38,6 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 	exp_readlock();
 	nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
 	fh_put(&fh);
-	rqstp->rq_client = NULL;
 	exp_readunlock();
  	/* We return nlm error codes as nlm doesn't know
 	 * about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4b36ec3eb8ea..4cf04e11c66c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -258,6 +258,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
 	if (atomic_dec_and_test(&fp->fi_delegees)) {
 		vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
 		fp->fi_lease = NULL;
+		fput(fp->fi_deleg_file);
 		fp->fi_deleg_file = NULL;
 	}
 }
@@ -397,9 +398,12 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp)
 
 static void free_generic_stateid(struct nfs4_stateid *stp)
 {
-	int oflag = nfs4_access_bmap_to_omode(stp);
+	int oflag;
 
-	nfs4_file_put_access(stp->st_file, oflag);
+	if (stp->st_access_bmap) {
+		oflag = nfs4_access_bmap_to_omode(stp);
+		nfs4_file_put_access(stp->st_file, oflag);
+	}
 	put_nfs4_file(stp->st_file);
 	kmem_cache_free(stateid_slab, stp);
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2e1cebde90df..129f3c9f62d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1363,7 +1363,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out;
 	if (!(iap->ia_valid & ATTR_MODE))
 		iap->ia_mode = 0;
-	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
 	if (err)
 		goto out;
 
@@ -1385,6 +1385,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (IS_ERR(dchild))
 		goto out_nfserr;
 
+	/* If file doesn't exist, check for permissions to create one */
+	if (!dchild->d_inode) {
+		err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+		if (err)
+			goto out;
+	}
+
 	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
 	if (err)
 		goto out;
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 0a0a66d98cce..f7684483785e 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -646,7 +646,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
 	unsigned long group, group_offset;
 	int i, j, n, ret;
 
-	for (i = 0; i < nitems; i += n) {
+	for (i = 0; i < nitems; i = j) {
 		group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
 		ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
 		if (ret < 0)
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 643720209a98..9a3e6bbff27b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
 
 /* We want to make sure that nobody is heartbeating on top of us --
  * this will help detect an invalid configuration. */
-static int o2hb_check_last_timestamp(struct o2hb_region *reg)
+static void o2hb_check_last_timestamp(struct o2hb_region *reg)
 {
-	int node_num, ret;
 	struct o2hb_disk_slot *slot;
 	struct o2hb_disk_heartbeat_block *hb_block;
+	char *errstr;
 
-	node_num = o2nm_this_node();
-
-	ret = 1;
-	slot = &reg->hr_slots[node_num];
+	slot = &reg->hr_slots[o2nm_this_node()];
 	/* Don't check on our 1st timestamp */
-	if (slot->ds_last_time) {
-		hb_block = slot->ds_raw_block;
+	if (!slot->ds_last_time)
+		return;
 
-		if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time)
-			ret = 0;
-	}
+	hb_block = slot->ds_raw_block;
+	if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
+	    le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
+	    hb_block->hb_node == slot->ds_node_num)
+		return;
 
-	return ret;
+#define ERRSTR1		"Another node is heartbeating on device"
+#define ERRSTR2		"Heartbeat generation mismatch on device"
+#define ERRSTR3		"Heartbeat sequence mismatch on device"
+
+	if (hb_block->hb_node != slot->ds_node_num)
+		errstr = ERRSTR1;
+	else if (le64_to_cpu(hb_block->hb_generation) !=
+		 slot->ds_last_generation)
+		errstr = ERRSTR2;
+	else
+		errstr = ERRSTR3;
+
+	mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
+	     "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
+	     slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
+	     (unsigned long long)slot->ds_last_time, hb_block->hb_node,
+	     (unsigned long long)le64_to_cpu(hb_block->hb_generation),
+	     (unsigned long long)le64_to_cpu(hb_block->hb_seq));
 }
 
 static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
 	/* With an up to date view of the slots, we can check that no
 	 * other node has been improperly configured to heartbeat in
 	 * our slot. */
-	if (!o2hb_check_last_timestamp(reg))
-		mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
-		     "in our slot!\n", reg->hr_dev_name);
+	o2hb_check_last_timestamp(reg);
 
 	/* fill in the proper info for our next heartbeat */
 	o2hb_prepare_block(reg, reg->hr_generation);
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
 	}
 
 	i = -1;
-	while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
-
+	while((i = find_next_bit(configured_nodes,
+				 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
 		change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
 	}
 
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	struct file *filp = NULL;
 	struct inode *inode = NULL;
 	ssize_t ret = -EINVAL;
+	int live_threshold;
 
 	if (reg->hr_bdev)
 		goto out;
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	 * A node is considered live after it has beat LIVE_THRESHOLD
 	 * times.  We're not steady until we've given them a chance
 	 * _after_ our first read.
+	 * The default threshold is bare minimum so as to limit the delay
+	 * during mounts. For global heartbeat, the threshold doubled for the
+	 * first region.
 	 */
-	atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1);
+	live_threshold = O2HB_LIVE_THRESHOLD;
+	if (o2hb_global_heartbeat_active()) {
+		spin_lock(&o2hb_live_lock);
+		if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
+			live_threshold <<= 1;
+		spin_unlock(&o2hb_live_lock);
+	}
+	atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
 
 	hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
 			      reg->hr_item.ci_name);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 9fe5b8fd658f..8582e3f4f120 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		bytes = blocks_wanted << sb->s_blocksize_bits;
 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 	struct ocfs2_inode_info *oi = OCFS2_I(dir);
-	struct ocfs2_alloc_context *data_ac;
+	struct ocfs2_alloc_context *data_ac = NULL;
 	struct ocfs2_alloc_context *meta_ac = NULL;
 	struct buffer_head *dirdata_bh = NULL;
 	struct buffer_head *dx_root_bh = NULL;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7540a492eaba..3b179d6cbde0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
 	spin_unlock(&dlm->spinlock);
 
 	/* Support for global heartbeat and node info was added in 1.1 */
-	if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
+	if (dlm->dlm_locking_proto.pv_major > 1 ||
+	    dlm->dlm_locking_proto.pv_minor > 0) {
 		status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
 		if (status) {
 			mlog_errno(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index fede57ed005f..84d166328cf7 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2574,6 +2574,9 @@ fail:
 		res->state &= ~DLM_LOCK_RES_MIGRATING;
 		wake = 1;
 		spin_unlock(&res->spinlock);
+		if (dlm_is_host_down(ret))
+			dlm_wait_for_node_death(dlm, target,
+						DLM_NODE_DEATH_WAIT_MAX);
 		goto leave;
 	}
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41565ae52856..89659d6dc206 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
 	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
 
 	if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
+		/*
+		 * remove an entire extent record.
+		 */
 		*trunc_cpos = le32_to_cpu(rec->e_cpos);
 		/*
 		 * Skip holes if any.
@@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
 		*blkno = le64_to_cpu(rec->e_blkno);
 		*trunc_end = le32_to_cpu(rec->e_cpos);
 	} else if (range > trunc_start) {
+		/*
+		 * remove a partial extent record, which means we're
+		 * removing the last extent record.
+		 */
 		*trunc_cpos = trunc_start;
+		/*
+		 * skip hole if any.
+		 */
+		if (range < *trunc_end)
+			*trunc_end = range;
 		*trunc_len = *trunc_end - trunc_start;
 		coff = trunc_start - le32_to_cpu(rec->e_cpos);
 		*blkno = le64_to_cpu(rec->e_blkno) +
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index b141a44605ca..295d56454e8b 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
 {
 	struct ocfs2_journal *journal = osb->journal;
 
+	if (ocfs2_is_hard_readonly(osb))
+		return;
+
 	/* No need to queue up our truncate_log as regular cleanup will catch
 	 * that */
 	ocfs2_queue_recovery_completion(journal, osb->slot_num,
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index b68f87a83924..938387a10d5d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1019,7 +1019,7 @@ struct ocfs2_xattr_entry {
 	__le16	xe_name_offset;  /* byte offset from the 1st entry in the
 				    local xattr storage(inode, xattr block or
 				    xattr bucket). */
-	__u8	xe_name_len;	 /* xattr name len, does't include prefix. */
+	__u8	xe_name_len;	 /* xattr name len, doesn't include prefix. */
 	__u8	xe_type;         /* the low 7 bits indicate the name prefix
 				  * type and the highest bit indicates whether
 				  * the EA is stored in the local storage. */
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index ac0ccb5026a2..19d6750d1d6c 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -348,6 +348,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
 		goto fail;
 	}
 
+	/* Check that sizeof_partition_entry has the correct value */
+	if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
+		pr_debug("GUID Partitition Entry Size check failed.\n");
+		goto fail;
+	}
+
 	if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
 		goto fail;
 
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index b10e3540d5b7..ce4f62440425 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
 
 	BUG_ON (!data || !frags);
 
+	if (size < 2 * VBLK_SIZE_HEAD) {
+		ldm_error("Value of size is to small.");
+		return false;
+	}
+
 	group = get_unaligned_be32(data + 0x08);
 	rec   = get_unaligned_be16(data + 0x0C);
 	num   = get_unaligned_be16(data + 0x0E);
@@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
 		ldm_error ("A VBLK claims to have %d parts.", num);
 		return false;
 	}
+	if (rec >= num) {
+		ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
+		return false;
+	}
 
 	list_for_each (item, frags) {
 		f = list_entry (item, struct frag, list);
@@ -1334,10 +1343,9 @@ found:
 
 	f->map |= (1 << rec);
 
-	if (num > 0) {
-		data += VBLK_SIZE_HEAD;
-		size -= VBLK_SIZE_HEAD;
-	}
+	data += VBLK_SIZE_HEAD;
+	size -= VBLK_SIZE_HEAD;
+
 	memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
 
 	return true;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd6628d3ba42..dfa532730e55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-	struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
+	unsigned int nr;
+	struct task_struct *reaper;
 	struct tgid_iter iter;
 	struct pid_namespace *ns;
 
+	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+		goto out_no_task;
+	nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+
+	reaper = get_proc_task(filp->f_path.dentry->d_inode);
 	if (!reaper)
 		goto out_no_task;
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2e7addfd9803..318d8654989b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -214,7 +214,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	int flags = vma->vm_flags;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
-	unsigned long start;
+	unsigned long start, end;
 	dev_t dev = 0;
 	int len;
 
@@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
-	if (vma->vm_flags & VM_GROWSDOWN)
-		if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
-			start += PAGE_SIZE;
+	if (stack_guard_page_start(vma, start))
+		start += PAGE_SIZE;
+	end = vma->vm_end;
+	if (stack_guard_page_end(vma, end))
+		end -= PAGE_SIZE;
 
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
 			start,
-			vma->vm_end,
+			end,
 			flags & VM_READ ? 'r' : '-',
 			flags & VM_WRITE ? 'w' : '-',
 			flags & VM_EXEC ? 'x' : '-',
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
 void pstore_get_records(void)
 {
 	struct pstore_info *psi = psinfo;
-	size_t			size;
+	ssize_t			size;
 	u64			id;
 	enum pstore_type_id	type;
 	struct timespec		time;
-	int			failed = 0;
+	int			failed = 0, rc;
 
 	if (!psi)
 		return;
 
 	mutex_lock(&psinfo->buf_mutex);
+	rc = psi->open(psi);
+	if (rc)
+		goto out;
+
 	while ((size = psi->read(&id, &type, &time)) > 0) {
-		if (pstore_mkfile(type, psi->name, id, psi->buf, size,
+		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
 				  time, psi->erase))
 			failed++;
 	}
+	psi->close(psi);
+out:
 	mutex_unlock(&psinfo->buf_mutex);
 
 	if (failed)
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9eead2c796b7..fbb0b478a346 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,6 +112,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 		SetPageDirty(page);
 
 		unlock_page(page);
+		put_page(page);
 	}
 
 	return 0;
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c04f7e0b7ed2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -948,8 +948,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
 	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
 	 * but s_maxbytes was an unsigned long long for many releases. Throw
 	 * this warning for a little while to try and catch filesystems that
-	 * violate this rule. This warning should be either removed or
-	 * converted to a BUG() in 2.6.34.
+	 * violate this rule.
 	 */
 	WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
 		"negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
 
 #include "sysfs.h"
 
-/* used in crash dumps to help with debugging */
-static char last_sysfs_file[PATH_MAX];
-void sysfs_printk_last_file(void)
-{
-	printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
-}
-
 /*
  * There's one sysfs_buffer for each open file and one
  * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_buffer *buffer;
 	const struct sysfs_ops *ops;
 	int error = -EACCES;
-	char *p;
-
-	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
-	if (!IS_ERR(p))
-		memmove(last_sysfs_file, p, strlen(p) + 1);
 
 	/* need attr_sd for attr and ops, its parent for kobj */
 	if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
 }
 
 /**
- * sysfs_update_group - given a directory kobject, create an attribute group
- * @kobj:	The kobject to create the group on
- * @grp:	The attribute group to create
+ * sysfs_update_group - given a directory kobject, update an attribute group
+ * @kobj:	The kobject to update the group on
+ * @grp:	The attribute group to update
  *
  * This function updates an attribute group.  Unlike
  * sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 919f0de29d8f..e6493cac193d 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,6 +23,12 @@
 #ifndef __UBIFS_DEBUG_H__
 #define __UBIFS_DEBUG_H__
 
+/* Checking helper functions */
+typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
+				 struct ubifs_zbranch *zbr, void *priv);
+typedef int (*dbg_znode_callback)(struct ubifs_info *c,
+				  struct ubifs_znode *znode, void *priv);
+
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
 /**
@@ -270,11 +276,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
 void dbg_dump_index(struct ubifs_info *c);
 void dbg_dump_lpt_lebs(const struct ubifs_info *c);
 
-/* Checking helper functions */
-typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
-				 struct ubifs_zbranch *zbr, void *priv);
-typedef int (*dbg_znode_callback)(struct ubifs_info *c,
-				  struct ubifs_znode *znode, void *priv);
 int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
 		   dbg_znode_callback znode_cb, void *priv);
 
@@ -295,7 +296,6 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
 int dbg_check_filesystem(struct ubifs_info *c);
 void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
 		    int add_pos);
-int dbg_check_lprops(struct ubifs_info *c);
 int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
 			int row, int col);
 int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
@@ -401,58 +401,94 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 #define DBGKEY(key)  ((char *)(key))
 #define DBGKEY1(key) ((char *)(key))
 
-#define ubifs_debugging_init(c)                0
-#define ubifs_debugging_exit(c)                ({})
-
-#define dbg_ntype(type)                        ""
-#define dbg_cstate(cmt_state)                  ""
-#define dbg_jhead(jhead)                       ""
-#define dbg_get_key_dump(c, key)               ({})
-#define dbg_dump_inode(c, inode)               ({})
-#define dbg_dump_node(c, node)                 ({})
-#define dbg_dump_lpt_node(c, node, lnum, offs) ({})
-#define dbg_dump_budget_req(req)               ({})
-#define dbg_dump_lstats(lst)                   ({})
-#define dbg_dump_budg(c)                       ({})
-#define dbg_dump_lprop(c, lp)                  ({})
-#define dbg_dump_lprops(c)                     ({})
-#define dbg_dump_lpt_info(c)                   ({})
-#define dbg_dump_leb(c, lnum)                  ({})
-#define dbg_dump_znode(c, znode)               ({})
-#define dbg_dump_heap(c, heap, cat)            ({})
-#define dbg_dump_pnode(c, pnode, parent, iip)  ({})
-#define dbg_dump_tnc(c)                        ({})
-#define dbg_dump_index(c)                      ({})
-#define dbg_dump_lpt_lebs(c)                   ({})
-
-#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
-#define dbg_old_index_check_init(c, zroot)         0
-#define dbg_save_space_info(c)                     ({})
-#define dbg_check_space_info(c)                    0
-#define dbg_check_old_index(c, zroot)              0
-#define dbg_check_cats(c)                          0
-#define dbg_check_ltab(c)                          0
-#define dbg_chk_lpt_free_spc(c)                    0
-#define dbg_chk_lpt_sz(c, action, len)             0
-#define dbg_check_synced_i_size(inode)             0
-#define dbg_check_dir_size(c, dir)                 0
-#define dbg_check_tnc(c, x)                        0
-#define dbg_check_idx_size(c, idx_size)            0
-#define dbg_check_filesystem(c)                    0
-#define dbg_check_heap(c, heap, cat, add_pos)      ({})
-#define dbg_check_lprops(c)                        0
-#define dbg_check_lpt_nodes(c, cnode, row, col)    0
-#define dbg_check_inode_size(c, inode, size)       0
-#define dbg_check_data_nodes_order(c, head)        0
-#define dbg_check_nondata_nodes_order(c, head)     0
-#define dbg_force_in_the_gaps_enabled              0
-#define dbg_force_in_the_gaps()                    0
-#define dbg_failure_mode                           0
-
-#define dbg_debugfs_init()                         0
-#define dbg_debugfs_exit()
-#define dbg_debugfs_init_fs(c)                     0
-#define dbg_debugfs_exit_fs(c)                     0
+static inline int ubifs_debugging_init(struct ubifs_info *c)      { return 0; }
+static inline void ubifs_debugging_exit(struct ubifs_info *c)     { return; }
+static inline const char *dbg_ntype(int type)                     { return ""; }
+static inline const char *dbg_cstate(int cmt_state)               { return ""; }
+static inline const char *dbg_jhead(int jhead)                    { return ""; }
+static inline const char *
+dbg_get_key_dump(const struct ubifs_info *c,
+		 const union ubifs_key *key)                      { return ""; }
+static inline void dbg_dump_inode(const struct ubifs_info *c,
+				  const struct inode *inode)      { return; }
+static inline void dbg_dump_node(const struct ubifs_info *c,
+				 const void *node)                { return; }
+static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
+				     void *node, int lnum,
+				     int offs)                    { return; }
+static inline void
+dbg_dump_budget_req(const struct ubifs_budget_req *req)           { return; }
+static inline void
+dbg_dump_lstats(const struct ubifs_lp_stats *lst)                 { return; }
+static inline void dbg_dump_budg(struct ubifs_info *c)            { return; }
+static inline void dbg_dump_lprop(const struct ubifs_info *c,
+				  const struct ubifs_lprops *lp)  { return; }
+static inline void dbg_dump_lprops(struct ubifs_info *c)          { return; }
+static inline void dbg_dump_lpt_info(struct ubifs_info *c)        { return; }
+static inline void dbg_dump_leb(const struct ubifs_info *c,
+				int lnum)                         { return; }
+static inline void
+dbg_dump_znode(const struct ubifs_info *c,
+	       const struct ubifs_znode *znode)                   { return; }
+static inline void dbg_dump_heap(struct ubifs_info *c,
+				 struct ubifs_lpt_heap *heap,
+				 int cat)                         { return; }
+static inline void dbg_dump_pnode(struct ubifs_info *c,
+				  struct ubifs_pnode *pnode,
+				  struct ubifs_nnode *parent,
+				  int iip)                        { return; }
+static inline void dbg_dump_tnc(struct ubifs_info *c)             { return; }
+static inline void dbg_dump_index(struct ubifs_info *c)           { return; }
+static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c)  { return; }
+
+static inline int dbg_walk_index(struct ubifs_info *c,
+				 dbg_leaf_callback leaf_cb,
+				 dbg_znode_callback znode_cb,
+				 void *priv)                      { return 0; }
+static inline void dbg_save_space_info(struct ubifs_info *c)      { return; }
+static inline int dbg_check_space_info(struct ubifs_info *c)      { return 0; }
+static inline int dbg_check_lprops(struct ubifs_info *c)          { return 0; }
+static inline int
+dbg_old_index_check_init(struct ubifs_info *c,
+			 struct ubifs_zbranch *zroot)             { return 0; }
+static inline int
+dbg_check_old_index(struct ubifs_info *c,
+		    struct ubifs_zbranch *zroot)                  { return 0; }
+static inline int dbg_check_cats(struct ubifs_info *c)            { return 0; }
+static inline int dbg_check_ltab(struct ubifs_info *c)            { return 0; }
+static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c)      { return 0; }
+static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
+				 int action, int len)             { return 0; }
+static inline int dbg_check_synced_i_size(struct inode *inode)    { return 0; }
+static inline int dbg_check_dir_size(struct ubifs_info *c,
+				     const struct inode *dir)     { return 0; }
+static inline int dbg_check_tnc(struct ubifs_info *c, int extra)  { return 0; }
+static inline int dbg_check_idx_size(struct ubifs_info *c,
+				     long long idx_size)          { return 0; }
+static inline int dbg_check_filesystem(struct ubifs_info *c)      { return 0; }
+static inline void dbg_check_heap(struct ubifs_info *c,
+				  struct ubifs_lpt_heap *heap,
+				  int cat, int add_pos)           { return; }
+static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
+	struct ubifs_cnode *cnode, int row, int col)              { return 0; }
+static inline int dbg_check_inode_size(struct ubifs_info *c,
+				       const struct inode *inode,
+				       loff_t size)               { return 0; }
+static inline int
+dbg_check_data_nodes_order(struct ubifs_info *c,
+			   struct list_head *head)                { return 0; }
+static inline int
+dbg_check_nondata_nodes_order(struct ubifs_info *c,
+			      struct list_head *head)             { return 0; }
+
+static inline int dbg_force_in_the_gaps(void)                     { return 0; }
+#define dbg_force_in_the_gaps_enabled 0
+#define dbg_failure_mode              0
+
+static inline int dbg_debugfs_init(void)                          { return 0; }
+static inline void dbg_debugfs_exit(void)                         { return; }
+static inline int dbg_debugfs_init_fs(struct ubifs_info *c)       { return 0; }
+static inline int dbg_debugfs_exit_fs(struct ubifs_info *c)       { return 0; }
 
 #endif /* !CONFIG_UBIFS_FS_DEBUG */
 #endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 28be1e6a65e8..b286db79c686 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1312,6 +1312,9 @@ int ubifs_fsync(struct file *file, int datasync)
 
 	dbg_gen("syncing inode %lu", inode->i_ino);
 
+	if (inode->i_sb->s_flags & MS_RDONLY)
+		return 0;
+
 	/*
 	 * VFS has already synchronized dirty pages for this inode. Synchronize
 	 * the inode unless this is a 'datasync()' call.
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 4d0cb1241460..40fa780ebea7 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -175,26 +175,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
 }
 
 /**
- * ubifs_create_buds_lists - create journal head buds lists for remount rw.
- * @c: UBIFS file-system description object
- */
-void ubifs_create_buds_lists(struct ubifs_info *c)
-{
-	struct rb_node *p;
-
-	spin_lock(&c->buds_lock);
-	p = rb_first(&c->buds);
-	while (p) {
-		struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
-		struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
-
-		list_add_tail(&bud->list, &jhead->buds_list);
-		p = rb_next(p);
-	}
-	spin_unlock(&c->buds_lock);
-}
-
-/**
  * ubifs_add_bud_to_log - add a new bud to the log.
  * @c: UBIFS file-system description object
  * @jhead: journal head the bud belongs to
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 936f2cbfe6b6..3dbad6fbd1eb 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -317,6 +317,32 @@ int ubifs_recover_master_node(struct ubifs_info *c)
 			goto out_free;
 		}
 		memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
+
+		/*
+		 * We had to recover the master node, which means there was an
+		 * unclean reboot. However, it is possible that the master node
+		 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
+		 * E.g., consider the following chain of events:
+		 *
+		 * 1. UBIFS was cleanly unmounted, so the master node is clean
+		 * 2. UBIFS is being mounted R/W and starts changing the master
+		 *    node in the first (%UBIFS_MST_LNUM). A power cut happens,
+		 *    so this LEB ends up with some amount of garbage at the
+		 *    end.
+		 * 3. UBIFS is being mounted R/O. We reach this place and
+		 *    recover the master node from the second LEB
+		 *    (%UBIFS_MST_LNUM + 1). But we cannot update the media
+		 *    because we are being mounted R/O. We have to defer the
+		 *    operation.
+		 * 4. However, this master node (@c->mst_node) is marked as
+		 *    clean (since the step 1). And if we just return, the
+		 *    mount code will be confused and won't recover the master
+		 *    node when it is re-mounter R/W later.
+		 *
+		 *    Thus, to force the recovery by marking the master node as
+		 *    dirty.
+		 */
+		c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 	} else {
 		/* Write the recovered master node */
 		c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eed0fcff8d73..d3d6d365bfc1 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -59,6 +59,7 @@ enum {
  * @new_size: truncation new size
  * @free: amount of free space in a bud
  * @dirty: amount of dirty space in a bud from padding and deletion nodes
+ * @jhead: journal head number of the bud
  *
  * UBIFS journal replay must compare node sequence numbers, which means it must
  * build a tree of node information to insert into the TNC.
@@ -80,6 +81,7 @@ struct replay_entry {
 		struct {
 			int free;
 			int dirty;
+			int jhead;
 		};
 	};
 };
@@ -159,6 +161,11 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
 		err = PTR_ERR(lp);
 		goto out;
 	}
+
+	/* Make sure the journal head points to the latest bud */
+	err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum,
+				     c->leb_size - r->free, UBI_SHORTTERM);
+
 out:
 	ubifs_release_lprops(c);
 	return err;
@@ -627,10 +634,6 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
 	ubifs_assert(sleb->endpt - offs >= used);
 	ubifs_assert(sleb->endpt % c->min_io_size == 0);
 
-	if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount)
-		err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
-					     sleb->endpt, UBI_SHORTTERM);
-
 	*dirty = sleb->endpt - offs - used;
 	*free = c->leb_size - sleb->endpt;
 
@@ -653,12 +656,14 @@ out_dump:
  * @sqnum: sequence number
  * @free: amount of free space in bud
  * @dirty: amount of dirty space from padding and deletion nodes
+ * @jhead: journal head number for the bud
  *
  * This function inserts a reference node to the replay tree and returns zero
  * in case of success or a negative error code in case of failure.
  */
 static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
-			   unsigned long long sqnum, int free, int dirty)
+			   unsigned long long sqnum, int free, int dirty,
+			   int jhead)
 {
 	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
 	struct replay_entry *r;
@@ -688,6 +693,7 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
 	r->flags = REPLAY_REF;
 	r->free = free;
 	r->dirty = dirty;
+	r->jhead = jhead;
 
 	rb_link_node(&r->rb, parent, p);
 	rb_insert_color(&r->rb, &c->replay_tree);
@@ -712,7 +718,7 @@ static int replay_buds(struct ubifs_info *c)
 		if (err)
 			return err;
 		err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
-				      free, dirty);
+				      free, dirty, b->bud->jhead);
 		if (err)
 			return err;
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c75f6133206c..04ad07f4fcc3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1257,12 +1257,12 @@ static int mount_ubifs(struct ubifs_info *c)
 		goto out_free;
 	}
 
+	err = alloc_wbufs(c);
+	if (err)
+		goto out_cbuf;
+
 	sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
 	if (!c->ro_mount) {
-		err = alloc_wbufs(c);
-		if (err)
-			goto out_cbuf;
-
 		/* Create background thread */
 		c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
 		if (IS_ERR(c->bgt)) {
@@ -1631,12 +1631,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 	if (err)
 		goto out;
 
-	err = alloc_wbufs(c);
-	if (err)
-		goto out;
-
-	ubifs_create_buds_lists(c);
-
 	/* Create background thread */
 	c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
 	if (IS_ERR(c->bgt)) {
@@ -1671,14 +1665,25 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 	if (err)
 		goto out;
 
+	dbg_gen("re-mounted read-write");
+	c->remounting_rw = 0;
+
 	if (c->need_recovery) {
 		c->need_recovery = 0;
 		ubifs_msg("deferred recovery completed");
+	} else {
+		/*
+		 * Do not run the debugging space check if the were doing
+		 * recovery, because when we saved the information we had the
+		 * file-system in a state where the TNC and lprops has been
+		 * modified in memory, but all the I/O operations (including a
+		 * commit) were deferred. So the file-system was in
+		 * "non-committed" state. Now the file-system is in committed
+		 * state, and of course the amount of free space will change
+		 * because, for example, the old index size was imprecise.
+		 */
+		err = dbg_check_space_info(c);
 	}
-
-	dbg_gen("re-mounted read-write");
-	c->remounting_rw = 0;
-	err = dbg_check_space_info(c);
 	mutex_unlock(&c->umount_mutex);
 	return err;
 
@@ -1733,7 +1738,6 @@ static void ubifs_remount_ro(struct ubifs_info *c)
 	if (err)
 		ubifs_ro_mode(c, err);
 
-	free_wbufs(c);
 	vfree(c->orph_buf);
 	c->orph_buf = NULL;
 	kfree(c->write_reserve_buf);
@@ -1761,10 +1765,12 @@ static void ubifs_put_super(struct super_block *sb)
 	 * of the media. For example, there will be dirty inodes if we failed
 	 * to write them back because of I/O errors.
 	 */
-	ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
-	ubifs_assert(c->budg_idx_growth == 0);
-	ubifs_assert(c->budg_dd_growth == 0);
-	ubifs_assert(c->budg_data_growth == 0);
+	if (!c->ro_error) {
+		ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
+		ubifs_assert(c->budg_idx_growth == 0);
+		ubifs_assert(c->budg_dd_growth == 0);
+		ubifs_assert(c->budg_data_growth == 0);
+	}
 
 	/*
 	 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
diff --git a/fs/xattr.c b/fs/xattr.c
index a19acdb81cd1..f1ef94974dea 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -666,7 +666,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
 	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->set(dentry, name, value, size, 0, handler->flags);
+	return handler->set(dentry, name, value, size, flags, handler->flags);
 }
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5ea402023ebd..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -293,7 +293,6 @@ xfs_buf_allocate_memory(
 	size_t			nbytes, offset;
 	gfp_t			gfp_mask = xb_to_gfp(flags);
 	unsigned short		page_count, i;
-	pgoff_t			first;
 	xfs_off_t		end;
 	int			error;
 
@@ -333,7 +332,6 @@ use_alloc_page:
 		return error;
 
 	offset = bp->b_offset;
-	first = bp->b_file_offset >> PAGE_SHIFT;
 	bp->b_flags |= _XBF_PAGES;
 
 	for (i = 0; i < bp->b_page_count; i++) {
@@ -657,8 +655,6 @@ xfs_buf_readahead(
 	xfs_off_t		ioff,
 	size_t			isize)
 {
-	struct backing_dev_info *bdi;
-
 	if (bdi_read_congested(target->bt_bdi))
 		return;
 
@@ -919,8 +915,6 @@ xfs_buf_lock(
 
 	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
 		xfs_log_force(bp->b_target->bt_mount, 0);
-	if (atomic_read(&bp->b_io_remaining))
-		blk_flush_plug(current);
 	down(&bp->b_sema);
 	XB_SET_OWNER(bp);
 
@@ -1309,8 +1303,6 @@ xfs_buf_iowait(
 {
 	trace_xfs_buf_iowait(bp, _RET_IP_);
 
-	if (atomic_read(&bp->b_io_remaining))
-		blk_flush_plug(current);
 	wait_for_completion(&bp->b_iowait);
 
 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1747,8 +1739,8 @@ xfsbufd(
 	do {
 		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
 		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
-		int	count = 0;
 		struct list_head tmp;
+		struct blk_plug plug;
 
 		if (unlikely(freezing(current))) {
 			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1764,16 +1756,15 @@ xfsbufd(
 
 		xfs_buf_delwri_split(target, &tmp, age);
 		list_sort(NULL, &tmp, xfs_buf_cmp);
+
+		blk_start_plug(&plug);
 		while (!list_empty(&tmp)) {
 			struct xfs_buf *bp;
 			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
 			list_del_init(&bp->b_list);
 			xfs_bdstrat_cb(bp);
-			count++;
 		}
-		if (count)
-			blk_flush_plug(current);
-
+		blk_finish_plug(&plug);
 	} while (!kthread_should_stop());
 
 	return 0;
@@ -1793,6 +1784,7 @@ xfs_flush_buftarg(
 	int		pincount = 0;
 	LIST_HEAD(tmp_list);
 	LIST_HEAD(wait_list);
+	struct blk_plug plug;
 
 	xfs_buf_runall_queues(xfsconvertd_workqueue);
 	xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1807,6 +1799,8 @@ xfs_flush_buftarg(
 	 * we do that after issuing all the IO.
 	 */
 	list_sort(NULL, &tmp_list, xfs_buf_cmp);
+
+	blk_start_plug(&plug);
 	while (!list_empty(&tmp_list)) {
 		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
 		ASSERT(target == bp->b_target);
@@ -1817,10 +1811,10 @@ xfs_flush_buftarg(
 		}
 		xfs_bdstrat_cb(bp);
 	}
+	blk_finish_plug(&plug);
 
 	if (wait) {
-		/* Expedite and wait for IO to complete. */
-		blk_flush_plug(current);
+		/* Wait for IO to complete. */
 		while (!list_empty(&wait_list)) {
 			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
 
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 508e06fd7d1e..9f76cceb678d 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -28,53 +28,49 @@
 /*
  * XFS logging functions
  */
-static int
+static void
 __xfs_printk(
 	const char		*level,
 	const struct xfs_mount	*mp,
 	struct va_format	*vaf)
 {
-	if (mp && mp->m_fsname)
-		return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
-	return printk("%sXFS: %pV\n", level, vaf);
+	if (mp && mp->m_fsname) {
+		printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
+		return;
+	}
+	printk("%sXFS: %pV\n", level, vaf);
 }
 
-int xfs_printk(
+void xfs_printk(
 	const char		*level,
 	const struct xfs_mount	*mp,
 	const char		*fmt, ...)
 {
 	struct va_format	vaf;
 	va_list			args;
-	int			 r;
 
 	va_start(args, fmt);
 
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	r = __xfs_printk(level, mp, &vaf);
+	__xfs_printk(level, mp, &vaf);
 	va_end(args);
-
-	return r;
 }
 
 #define define_xfs_printk_level(func, kern_level)		\
-int func(const struct xfs_mount *mp, const char *fmt, ...)	\
+void func(const struct xfs_mount *mp, const char *fmt, ...)	\
 {								\
 	struct va_format	vaf;				\
 	va_list			args;				\
-	int			r;				\
 								\
 	va_start(args, fmt);					\
 								\
 	vaf.fmt = fmt;						\
 	vaf.va = &args;						\
 								\
-	r = __xfs_printk(kern_level, mp, &vaf);			\
+	__xfs_printk(kern_level, mp, &vaf);			\
 	va_end(args);						\
-								\
-	return r;						\
 }								\
 
 define_xfs_printk_level(xfs_emerg, KERN_EMERG);
@@ -88,7 +84,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO);
 define_xfs_printk_level(xfs_debug, KERN_DEBUG);
 #endif
 
-int
+void
 xfs_alert_tag(
 	const struct xfs_mount	*mp,
 	int			panic_tag,
@@ -97,7 +93,6 @@ xfs_alert_tag(
 	struct va_format	vaf;
 	va_list			args;
 	int			do_panic = 0;
-	int			r;
 
 	if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
 		xfs_printk(KERN_ALERT, mp,
@@ -110,12 +105,10 @@ xfs_alert_tag(
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	r = __xfs_printk(KERN_ALERT, mp, &vaf);
+	__xfs_printk(KERN_ALERT, mp, &vaf);
 	va_end(args);
 
 	BUG_ON(do_panic);
-
-	return r;
 }
 
 void
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index e77ffa16745b..f1b3fc1b6c4e 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,32 +3,34 @@
 
 struct xfs_mount;
 
-extern int xfs_printk(const char *level, const struct xfs_mount *mp,
+extern void xfs_printk(const char *level, const struct xfs_mount *mp,
                       const char *fmt, ...)
         __attribute__ ((format (printf, 3, 4)));
-extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
-extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
-extern int xfs_alert_tag(const struct xfs_mount *mp, int tag,
+extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
 			 const char *fmt, ...)
         __attribute__ ((format (printf, 3, 4)));
-extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
-extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
-extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
-extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
-extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
 
 #ifdef DEBUG
-extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
         __attribute__ ((format (printf, 2, 3)));
 #else
-#define xfs_debug(mp, fmt, ...)	(0)
+static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+{
+}
 #endif
 
 extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1ba5c451da36..b38e58d02299 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -816,75 +816,6 @@ xfs_setup_devices(
 	return 0;
 }
 
-/*
- * XFS AIL push thread support
- */
-void
-xfsaild_wakeup(
-	struct xfs_ail		*ailp,
-	xfs_lsn_t		threshold_lsn)
-{
-	/* only ever move the target forwards */
-	if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
-		ailp->xa_target = threshold_lsn;
-		wake_up_process(ailp->xa_task);
-	}
-}
-
-STATIC int
-xfsaild(
-	void	*data)
-{
-	struct xfs_ail	*ailp = data;
-	xfs_lsn_t	last_pushed_lsn = 0;
-	long		tout = 0; /* milliseconds */
-
-	while (!kthread_should_stop()) {
-		/*
-		 * for short sleeps indicating congestion, don't allow us to
-		 * get woken early. Otherwise all we do is bang on the AIL lock
-		 * without making progress.
-		 */
-		if (tout && tout <= 20)
-			__set_current_state(TASK_KILLABLE);
-		else
-			__set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(tout ?
-				 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
-
-		/* swsusp */
-		try_to_freeze();
-
-		ASSERT(ailp->xa_mount->m_log);
-		if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
-			continue;
-
-		tout = xfsaild_push(ailp, &last_pushed_lsn);
-	}
-
-	return 0;
-}	/* xfsaild */
-
-int
-xfsaild_start(
-	struct xfs_ail	*ailp)
-{
-	ailp->xa_target = 0;
-	ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
-				    ailp->xa_mount->m_fsname);
-	if (IS_ERR(ailp->xa_task))
-		return -PTR_ERR(ailp->xa_task);
-	return 0;
-}
-
-void
-xfsaild_stop(
-	struct xfs_ail	*ailp)
-{
-	kthread_stop(ailp->xa_task);
-}
-
-
 /* Catch misguided souls that try to use this interface on XFS */
 STATIC struct inode *
 xfs_fs_alloc_inode(
@@ -1191,22 +1122,12 @@ xfs_fs_sync_fs(
 		return -error;
 
 	if (laptop_mode) {
-		int	prev_sync_seq = mp->m_sync_seq;
-
 		/*
 		 * The disk must be active because we're syncing.
 		 * We schedule xfssyncd now (now that the disk is
 		 * active) instead of later (when it might not be).
 		 */
-		wake_up_process(mp->m_sync_task);
-		/*
-		 * We have to wait for the sync iteration to complete.
-		 * If we don't, the disk activity caused by the sync
-		 * will come after the sync is completed, and that
-		 * triggers another sync from laptop mode.
-		 */
-		wait_event(mp->m_wait_single_sync_task,
-				mp->m_sync_seq != prev_sync_seq);
+		flush_delayed_work_sync(&mp->m_sync_work);
 	}
 
 	return 0;
@@ -1490,9 +1411,6 @@ xfs_fs_fill_super(
 	spin_lock_init(&mp->m_sb_lock);
 	mutex_init(&mp->m_growlock);
 	atomic_set(&mp->m_active_trans, 0);
-	INIT_LIST_HEAD(&mp->m_sync_list);
-	spin_lock_init(&mp->m_sync_lock);
-	init_waitqueue_head(&mp->m_wait_single_sync_task);
 
 	mp->m_super = sb;
 	sb->s_fs_info = mp;
@@ -1799,6 +1717,38 @@ xfs_destroy_zones(void)
 }
 
 STATIC int __init
+xfs_init_workqueues(void)
+{
+	/*
+	 * max_active is set to 8 to give enough concurency to allow
+	 * multiple work operations on each CPU to run. This allows multiple
+	 * filesystems to be running sync work concurrently, and scales with
+	 * the number of CPUs in the system.
+	 */
+	xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+	if (!xfs_syncd_wq)
+		goto out;
+
+	xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
+	if (!xfs_ail_wq)
+		goto out_destroy_syncd;
+
+	return 0;
+
+out_destroy_syncd:
+	destroy_workqueue(xfs_syncd_wq);
+out:
+	return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_workqueues(void)
+{
+	destroy_workqueue(xfs_ail_wq);
+	destroy_workqueue(xfs_syncd_wq);
+}
+
+STATIC int __init
 init_xfs_fs(void)
 {
 	int			error;
@@ -1813,10 +1763,14 @@ init_xfs_fs(void)
 	if (error)
 		goto out;
 
-	error = xfs_mru_cache_init();
+	error = xfs_init_workqueues();
 	if (error)
 		goto out_destroy_zones;
 
+	error = xfs_mru_cache_init();
+	if (error)
+		goto out_destroy_wq;
+
 	error = xfs_filestream_init();
 	if (error)
 		goto out_mru_cache_uninit;
@@ -1833,6 +1787,10 @@ init_xfs_fs(void)
 	if (error)
 		goto out_cleanup_procfs;
 
+	error = xfs_init_workqueues();
+	if (error)
+		goto out_sysctl_unregister;
+
 	vfs_initquota();
 
 	error = register_filesystem(&xfs_fs_type);
@@ -1850,6 +1808,8 @@ init_xfs_fs(void)
 	xfs_filestream_uninit();
  out_mru_cache_uninit:
 	xfs_mru_cache_uninit();
+ out_destroy_wq:
+	xfs_destroy_workqueues();
  out_destroy_zones:
 	xfs_destroy_zones();
  out:
@@ -1866,6 +1826,7 @@ exit_xfs_fs(void)
 	xfs_buf_terminate();
 	xfs_filestream_uninit();
 	xfs_mru_cache_uninit();
+	xfs_destroy_workqueues();
 	xfs_destroy_zones();
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 9cf35a688f53..3e898a48122d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
 #include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
@@ -39,6 +40,8 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 
+struct workqueue_struct	*xfs_syncd_wq;	/* sync workqueue */
+
 /*
  * The inode lookup is done in batches to keep the amount of lock traffic and
  * radix tree lookups to a minimum. The batch size is a trade off between
@@ -431,62 +434,12 @@ xfs_quiesce_attr(
 	xfs_unmountfs_writesb(mp);
 }
 
-/*
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
-	struct xfs_mount *mp,
-	void		*data,
-	void		(*syncer)(struct xfs_mount *, void *),
-	struct completion *completion)
+static void
+xfs_syncd_queue_sync(
+	struct xfs_mount        *mp)
 {
-	struct xfs_sync_work *work;
-
-	work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
-	INIT_LIST_HEAD(&work->w_list);
-	work->w_syncer = syncer;
-	work->w_data = data;
-	work->w_mount = mp;
-	work->w_completion = completion;
-	spin_lock(&mp->m_sync_lock);
-	list_add_tail(&work->w_list, &mp->m_sync_list);
-	spin_unlock(&mp->m_sync_lock);
-	wake_up_process(mp->m_sync_task);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inodes_work(
-	struct xfs_mount *mp,
-	void		*arg)
-{
-	struct inode	*inode = arg;
-	xfs_sync_data(mp, SYNC_TRYLOCK);
-	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-	iput(inode);
-}
-
-void
-xfs_flush_inodes(
-	xfs_inode_t	*ip)
-{
-	struct inode	*inode = VFS_I(ip);
-	DECLARE_COMPLETION_ONSTACK(completion);
-
-	igrab(inode);
-	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
-	wait_for_completion(&completion);
-	xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
+	queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
+				msecs_to_jiffies(xfs_syncd_centisecs * 10));
 }
 
 /*
@@ -496,9 +449,10 @@ xfs_flush_inodes(
  */
 STATIC void
 xfs_sync_worker(
-	struct xfs_mount *mp,
-	void		*unused)
+	struct work_struct *work)
 {
+	struct xfs_mount *mp = container_of(to_delayed_work(work),
+					struct xfs_mount, m_sync_work);
 	int		error;
 
 	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -508,73 +462,106 @@ xfs_sync_worker(
 			error = xfs_fs_log_dummy(mp);
 		else
 			xfs_log_force(mp, 0);
-		xfs_reclaim_inodes(mp, 0);
 		error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+
+		/* start pushing all the metadata that is currently dirty */
+		xfs_ail_push_all(mp->m_ail);
 	}
-	mp->m_sync_seq++;
-	wake_up(&mp->m_wait_single_sync_task);
+
+	/* queue us up again */
+	xfs_syncd_queue_sync(mp);
 }
 
-STATIC int
-xfssyncd(
-	void			*arg)
+/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+	struct xfs_mount        *mp)
 {
-	struct xfs_mount	*mp = arg;
-	long			timeleft;
-	xfs_sync_work_t		*work, *n;
-	LIST_HEAD		(tmp);
-
-	set_freezable();
-	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
-	for (;;) {
-		if (list_empty(&mp->m_sync_list))
-			timeleft = schedule_timeout_interruptible(timeleft);
-		/* swsusp */
-		try_to_freeze();
-		if (kthread_should_stop() && list_empty(&mp->m_sync_list))
-			break;
 
-		spin_lock(&mp->m_sync_lock);
-		/*
-		 * We can get woken by laptop mode, to do a sync -
-		 * that's the (only!) case where the list would be
-		 * empty with time remaining.
-		 */
-		if (!timeleft || list_empty(&mp->m_sync_list)) {
-			if (!timeleft)
-				timeleft = xfs_syncd_centisecs *
-							msecs_to_jiffies(10);
-			INIT_LIST_HEAD(&mp->m_sync_work.w_list);
-			list_add_tail(&mp->m_sync_work.w_list,
-					&mp->m_sync_list);
-		}
-		list_splice_init(&mp->m_sync_list, &tmp);
-		spin_unlock(&mp->m_sync_lock);
+	/*
+	 * We can have inodes enter reclaim after we've shut down the syncd
+	 * workqueue during unmount, so don't allow reclaim work to be queued
+	 * during unmount.
+	 */
+	if (!(mp->m_super->s_flags & MS_ACTIVE))
+		return;
 
-		list_for_each_entry_safe(work, n, &tmp, w_list) {
-			(*work->w_syncer)(mp, work->w_data);
-			list_del(&work->w_list);
-			if (work == &mp->m_sync_work)
-				continue;
-			if (work->w_completion)
-				complete(work->w_completion);
-			kmem_free(work);
-		}
+	rcu_read_lock();
+	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+		queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
+			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
 	}
+	rcu_read_unlock();
+}
 
-	return 0;
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+	struct work_struct *work)
+{
+	struct xfs_mount *mp = container_of(to_delayed_work(work),
+					struct xfs_mount, m_reclaim_work);
+
+	xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+	xfs_syncd_queue_reclaim(mp);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+
+	queue_work(xfs_syncd_wq, &mp->m_flush_work);
+	flush_work_sync(&mp->m_flush_work);
+}
+
+STATIC void
+xfs_flush_worker(
+	struct work_struct *work)
+{
+	struct xfs_mount *mp = container_of(work,
+					struct xfs_mount, m_flush_work);
+
+	xfs_sync_data(mp, SYNC_TRYLOCK);
+	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
 }
 
 int
 xfs_syncd_init(
 	struct xfs_mount	*mp)
 {
-	mp->m_sync_work.w_syncer = xfs_sync_worker;
-	mp->m_sync_work.w_mount = mp;
-	mp->m_sync_work.w_completion = NULL;
-	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
-	if (IS_ERR(mp->m_sync_task))
-		return -PTR_ERR(mp->m_sync_task);
+	INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
+	INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
+	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+
+	xfs_syncd_queue_sync(mp);
+	xfs_syncd_queue_reclaim(mp);
+
 	return 0;
 }
 
@@ -582,7 +569,9 @@ void
 xfs_syncd_stop(
 	struct xfs_mount	*mp)
 {
-	kthread_stop(mp->m_sync_task);
+	cancel_delayed_work_sync(&mp->m_sync_work);
+	cancel_delayed_work_sync(&mp->m_reclaim_work);
+	cancel_work_sync(&mp->m_flush_work);
 }
 
 void
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
 				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
 				XFS_ICI_RECLAIM_TAG);
 		spin_unlock(&ip->i_mount->m_perag_lock);
+
+		/* schedule periodic background inode reclaim */
+		xfs_syncd_queue_reclaim(ip->i_mount);
+
 		trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
 							-1, _RET_IP_);
 	}
@@ -933,6 +926,7 @@ restart:
 					XFS_LOOKUP_BATCH,
 					XFS_ICI_RECLAIM_TAG);
 			if (!nr_found) {
+				done = 1;
 				rcu_read_unlock();
 				break;
 			}
@@ -1017,7 +1011,13 @@ xfs_reclaim_inodes(
 }
 
 /*
- * Shrinker infrastructure.
+ * Inode cache shrinker.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doiing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
  */
 static int
 xfs_reclaim_inode_shrink(
@@ -1032,10 +1032,15 @@ xfs_reclaim_inode_shrink(
 
 	mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
 	if (nr_to_scan) {
+		/* kick background reclaimer and push the AIL */
+		xfs_syncd_queue_reclaim(mp);
+		xfs_ail_push_all(mp->m_ail);
+
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
 
-		xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+		xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
+					&nr_to_scan);
 		/* terminate if we don't exhaust the scan */
 		if (nr_to_scan > 0)
 			return -1;
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
 #define SYNC_WAIT		0x0001	/* wait for i/o to complete */
 #define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
 
+extern struct workqueue_struct	*xfs_syncd_wq;	/* sync workqueue */
+
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
 
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 254ee062bd7d..69228aa8605a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -461,12 +461,10 @@ xfs_qm_dqflush_all(
 	struct xfs_quotainfo	*q = mp->m_quotainfo;
 	int			recl;
 	struct xfs_dquot	*dqp;
-	int			niters;
 	int			error;
 
 	if (!q)
 		return 0;
-	niters = 0;
 again:
 	mutex_lock(&q->qi_dqlist_lock);
 	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
@@ -1314,14 +1312,9 @@ xfs_qm_dqiter_bufs(
 {
 	xfs_buf_t	*bp;
 	int		error;
-	int		notcommitted;
-	int		incr;
 	int		type;
 
 	ASSERT(blkcnt > 0);
-	notcommitted = 0;
-	incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
-		XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
 	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
 		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
 	error = 0;
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index c9446f1c726d..567b29b9f1b3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -65,11 +65,6 @@ extern kmem_zone_t	*qm_dqtrxzone;
  * block in the dquot/xqm code.
  */
 #define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
-/*
- * When doing a quotacheck, we log dquot clusters of this many FSBs at most
- * in a single transaction. We don't want to ask for too huge a log reservation.
- */
-#define XFS_QM_MAX_DQCLUSTER_LOGSZ	3
 
 typedef xfs_dqhash_t	xfs_dqlist_t;
 
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 0d62a07b7fd8..2dadb15d5ca9 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -313,14 +313,12 @@ xfs_qm_scall_quotaon(
 {
 	int		error;
 	uint		qf;
-	uint		accflags;
 	__int64_t	sbflags;
 
 	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
 	/*
 	 * Switching on quota accounting must be done at mount time.
 	 */
-	accflags = flags & XFS_ALL_QUOTA_ACCT;
 	flags &= ~(XFS_ALL_QUOTA_ACCT);
 
 	sbflags = 0;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4bc3c649aee4..27d64d752eab 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2395,17 +2395,33 @@ xfs_free_extent(
 	memset(&args, 0, sizeof(xfs_alloc_arg_t));
 	args.tp = tp;
 	args.mp = tp->t_mountp;
+
+	/*
+	 * validate that the block number is legal - the enables us to detect
+	 * and handle a silent filesystem corruption rather than crashing.
+	 */
 	args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
-	ASSERT(args.agno < args.mp->m_sb.sb_agcount);
+	if (args.agno >= args.mp->m_sb.sb_agcount)
+		return EFSCORRUPTED;
+
 	args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
+	if (args.agbno >= args.mp->m_sb.sb_agblocks)
+		return EFSCORRUPTED;
+
 	args.pag = xfs_perag_get(args.mp, args.agno);
-	if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
+	ASSERT(args.pag);
+
+	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
+	if (error)
 		goto error0;
-#ifdef DEBUG
-	ASSERT(args.agbp != NULL);
-	ASSERT((args.agbno + len) <=
-		be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
-#endif
+
+	/* validate the extent size is legal now we have the agf locked */
+	if (args.agbno + len >
+			be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
+		error = EFSCORRUPTED;
+		goto error0;
+	}
+
 	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
 error0:
 	xfs_perag_put(args.pag);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 46cc40131d4a..576fdfe81d60 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -198,6 +198,41 @@ xfs_inode_item_size(
 }
 
 /*
+ * xfs_inode_item_format_extents - convert in-core extents to on-disk form
+ *
+ * For either the data or attr fork in extent format, we need to endian convert
+ * the in-core extent as we place them into the on-disk inode. In this case, we
+ * need to do this conversion before we write the extents into the log. Because
+ * we don't have the disk inode to write into here, we allocate a buffer and
+ * format the extents into it via xfs_iextents_copy(). We free the buffer in
+ * the unlock routine after the copy for the log has been made.
+ *
+ * In the case of the data fork, the in-core and on-disk fork sizes can be
+ * different due to delayed allocation extents. We only log on-disk extents
+ * here, so always use the physical fork size to determine the size of the
+ * buffer we need to allocate.
+ */
+STATIC void
+xfs_inode_item_format_extents(
+	struct xfs_inode	*ip,
+	struct xfs_log_iovec	*vecp,
+	int			whichfork,
+	int			type)
+{
+	xfs_bmbt_rec_t		*ext_buffer;
+
+	ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
+	if (whichfork == XFS_DATA_FORK)
+		ip->i_itemp->ili_extents_buf = ext_buffer;
+	else
+		ip->i_itemp->ili_aextents_buf = ext_buffer;
+
+	vecp->i_addr = ext_buffer;
+	vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
+	vecp->i_type = type;
+}
+
+/*
  * This is called to fill in the vector of log iovecs for the
  * given inode log item.  It fills the first item with an inode
  * log format structure, the second with the on-disk inode structure,
@@ -213,7 +248,6 @@ xfs_inode_item_format(
 	struct xfs_inode	*ip = iip->ili_inode;
 	uint			nvecs;
 	size_t			data_bytes;
-	xfs_bmbt_rec_t		*ext_buffer;
 	xfs_mount_t		*mp;
 
 	vecp->i_addr = &iip->ili_format;
@@ -320,22 +354,8 @@ xfs_inode_item_format(
 			} else
 #endif
 			{
-				/*
-				 * There are delayed allocation extents
-				 * in the inode, or we need to convert
-				 * the extents to on disk format.
-				 * Use xfs_iextents_copy()
-				 * to copy only the real extents into
-				 * a separate buffer.  We'll free the
-				 * buffer in the unlock routine.
-				 */
-				ext_buffer = kmem_alloc(ip->i_df.if_bytes,
-					KM_SLEEP);
-				iip->ili_extents_buf = ext_buffer;
-				vecp->i_addr = ext_buffer;
-				vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
-						XFS_DATA_FORK);
-				vecp->i_type = XLOG_REG_TYPE_IEXT;
+				xfs_inode_item_format_extents(ip, vecp,
+					XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
 			}
 			ASSERT(vecp->i_len <= ip->i_df.if_bytes);
 			iip->ili_format.ilf_dsize = vecp->i_len;
@@ -445,19 +465,12 @@ xfs_inode_item_format(
 			 */
 			vecp->i_addr = ip->i_afp->if_u1.if_extents;
 			vecp->i_len = ip->i_afp->if_bytes;
+			vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
 #else
 			ASSERT(iip->ili_aextents_buf == NULL);
-			/*
-			 * Need to endian flip before logging
-			 */
-			ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
-				KM_SLEEP);
-			iip->ili_aextents_buf = ext_buffer;
-			vecp->i_addr = ext_buffer;
-			vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
-					XFS_ATTR_FORK);
+			xfs_inode_item_format_extents(ip, vecp,
+					XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
 #endif
-			vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
 			iip->ili_format.ilf_asize = vecp->i_len;
 			vecp++;
 			nvecs++;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index dc1882adaf54..751e94fe1f77 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -204,7 +204,6 @@ xfs_bulkstat(
 	xfs_agi_t		*agi;	/* agi header data */
 	xfs_agino_t		agino;	/* inode # in allocation group */
 	xfs_agnumber_t		agno;	/* allocation group number */
-	xfs_daddr_t		bno;	/* inode cluster start daddr */
 	int			chunkidx; /* current index into inode chunk */
 	int			clustidx; /* current index into inode cluster */
 	xfs_btree_cur_t		*cur;	/* btree cursor for ialloc btree */
@@ -463,7 +462,6 @@ xfs_bulkstat(
 						 mp->m_sb.sb_inopblog);
 				}
 				ino = XFS_AGINO_TO_INO(mp, agno, agino);
-				bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
 				/*
 				 * Skip if this inode is free.
 				 */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 25efa9b8a602..b612ce4520ae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -761,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
 		break;
 	case XLOG_STATE_COVER_NEED:
 	case XLOG_STATE_COVER_NEED2:
-		if (!xfs_trans_ail_tail(log->l_ailp) &&
+		if (!xfs_ail_min_lsn(log->l_ailp) &&
 		    xlog_iclogs_empty(log)) {
 			if (log->l_covered_state == XLOG_STATE_COVER_NEED)
 				log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -801,7 +801,7 @@ xlog_assign_tail_lsn(
 	xfs_lsn_t		tail_lsn;
 	struct log		*log = mp->m_log;
 
-	tail_lsn = xfs_trans_ail_tail(mp->m_ail);
+	tail_lsn = xfs_ail_min_lsn(mp->m_ail);
 	if (!tail_lsn)
 		tail_lsn = atomic64_read(&log->l_last_sync_lsn);
 
@@ -1239,7 +1239,7 @@ xlog_grant_push_ail(
 	 * the filesystem is shutting down.
 	 */
 	if (!XLOG_FORCED_SHUTDOWN(log))
-		xfs_trans_ail_push(log->l_ailp, threshold_lsn);
+		xfs_ail_push(log->l_ailp, threshold_lsn);
 }
 
 /*
@@ -3407,6 +3407,17 @@ xlog_verify_dest_ptr(
 		xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
 }
 
+/*
+ * Check to make sure the grant write head didn't just over lap the tail.  If
+ * the cycles are the same, we can't be overlapping.  Otherwise, make sure that
+ * the cycles differ by exactly one and check the byte count.
+ *
+ * This check is run unlocked, so can give false positives. Rather than assert
+ * on failures, use a warn-once flag and a panic tag to allow the admin to
+ * determine if they want to panic the machine when such an error occurs. For
+ * debug kernels this will have the same effect as using an assert but, unlinke
+ * an assert, it can be turned off at runtime.
+ */
 STATIC void
 xlog_verify_grant_tail(
 	struct log	*log)
@@ -3414,17 +3425,22 @@ xlog_verify_grant_tail(
 	int		tail_cycle, tail_blocks;
 	int		cycle, space;
 
-	/*
-	 * Check to make sure the grant write head didn't just over lap the
-	 * tail.  If the cycles are the same, we can't be overlapping.
-	 * Otherwise, make sure that the cycles differ by exactly one and
-	 * check the byte count.
-	 */
 	xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
 	xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
 	if (tail_cycle != cycle) {
-		ASSERT(cycle - 1 == tail_cycle);
-		ASSERT(space <= BBTOB(tail_blocks));
+		if (cycle - 1 != tail_cycle &&
+		    !(log->l_flags & XLOG_TAIL_WARN)) {
+			xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+				"%s: cycle - 1 != tail_cycle", __func__);
+			log->l_flags |= XLOG_TAIL_WARN;
+		}
+
+		if (space > BBTOB(tail_blocks) &&
+		    !(log->l_flags & XLOG_TAIL_WARN)) {
+			xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+				"%s: space > BBTOB(tail_blocks)", __func__);
+			log->l_flags |= XLOG_TAIL_WARN;
+		}
 	}
 }
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ffae692c9832..5864850e9e34 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -144,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i)
 #define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */
 #define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
 					   shutdown */
+#define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
 
 #ifdef __KERNEL__
 /*
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a62e8971539d..19af0ab0d0c6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -203,12 +203,9 @@ typedef struct xfs_mount {
 	struct mutex		m_icsb_mutex;	/* balancer sync lock */
 #endif
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
-	struct task_struct	*m_sync_task;	/* generalised sync thread */
-	xfs_sync_work_t		m_sync_work;	/* work item for VFS_SYNC */
-	struct list_head	m_sync_list;	/* sync thread work item list */
-	spinlock_t		m_sync_lock;	/* work item list lock */
-	int			m_sync_seq;	/* sync thread generation no. */
-	wait_queue_head_t	m_wait_single_sync_task;
+	struct delayed_work	m_sync_work;	/* background sync work */
+	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
+	struct work_struct	m_flush_work;	/* background inode flush */
 	__int64_t		m_update_flags;	/* sb flags we need to update
 						   on the next remount,rw */
 	struct shrinker		m_inode_shrink;	/* inode reclaim shrinker */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 12aff9584e29..5fc2380092c8 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,74 +28,138 @@
 #include "xfs_trans_priv.h"
 #include "xfs_error.h"
 
-STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
-STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
-STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
-STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
+struct workqueue_struct	*xfs_ail_wq;	/* AIL workqueue */
 
 #ifdef DEBUG
-STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *);
-#else
+/*
+ * Check that the list is sorted as it should be.
+ */
+STATIC void
+xfs_ail_check(
+	struct xfs_ail	*ailp,
+	xfs_log_item_t	*lip)
+{
+	xfs_log_item_t	*prev_lip;
+
+	if (list_empty(&ailp->xa_ail))
+		return;
+
+	/*
+	 * Check the next and previous entries are valid.
+	 */
+	ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
+	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
+	if (&prev_lip->li_ail != &ailp->xa_ail)
+		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
+
+	prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
+	if (&prev_lip->li_ail != &ailp->xa_ail)
+		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
+
+
+#ifdef XFS_TRANS_DEBUG
+	/*
+	 * Walk the list checking lsn ordering, and that every entry has the
+	 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
+	 * when specifically debugging the transaction subsystem.
+	 */
+	prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
+	list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
+		if (&prev_lip->li_ail != &ailp->xa_ail)
+			ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
+		ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
+		prev_lip = lip;
+	}
+#endif /* XFS_TRANS_DEBUG */
+}
+#else /* !DEBUG */
 #define	xfs_ail_check(a,l)
 #endif /* DEBUG */
 
+/*
+ * Return a pointer to the first item in the AIL.  If the AIL is empty, then
+ * return NULL.
+ */
+static xfs_log_item_t *
+xfs_ail_min(
+	struct xfs_ail  *ailp)
+{
+	if (list_empty(&ailp->xa_ail))
+		return NULL;
+
+	return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
+}
+
+ /*
+ * Return a pointer to the last item in the AIL.  If the AIL is empty, then
+ * return NULL.
+ */
+static xfs_log_item_t *
+xfs_ail_max(
+	struct xfs_ail  *ailp)
+{
+	if (list_empty(&ailp->xa_ail))
+		return NULL;
+
+	return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
+}
+
+/*
+ * Return a pointer to the item which follows the given item in the AIL.  If
+ * the given item is the last item in the list, then return NULL.
+ */
+static xfs_log_item_t *
+xfs_ail_next(
+	struct xfs_ail  *ailp,
+	xfs_log_item_t  *lip)
+{
+	if (lip->li_ail.next == &ailp->xa_ail)
+		return NULL;
+
+	return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
+}
 
 /*
- * This is called by the log manager code to determine the LSN
- * of the tail of the log.  This is exactly the LSN of the first
- * item in the AIL.  If the AIL is empty, then this function
- * returns 0.
+ * This is called by the log manager code to determine the LSN of the tail of
+ * the log.  This is exactly the LSN of the first item in the AIL.  If the AIL
+ * is empty, then this function returns 0.
  *
- * We need the AIL lock in order to get a coherent read of the
- * lsn of the last item in the AIL.
+ * We need the AIL lock in order to get a coherent read of the lsn of the last
+ * item in the AIL.
  */
 xfs_lsn_t
-xfs_trans_ail_tail(
+xfs_ail_min_lsn(
 	struct xfs_ail	*ailp)
 {
-	xfs_lsn_t	lsn;
+	xfs_lsn_t	lsn = 0;
 	xfs_log_item_t	*lip;
 
 	spin_lock(&ailp->xa_lock);
 	lip = xfs_ail_min(ailp);
-	if (lip == NULL) {
-		lsn = (xfs_lsn_t)0;
-	} else {
+	if (lip)
 		lsn = lip->li_lsn;
-	}
 	spin_unlock(&ailp->xa_lock);
 
 	return lsn;
 }
 
 /*
- * xfs_trans_push_ail
- *
- * This routine is called to move the tail of the AIL forward.  It does this by
- * trying to flush items in the AIL whose lsns are below the given
- * threshold_lsn.
- *
- * the push is run asynchronously in a separate thread, so we return the tail
- * of the log right now instead of the tail after the push. This means we will
- * either continue right away, or we will sleep waiting on the async thread to
- * do its work.
- *
- * We do this unlocked - we only need to know whether there is anything in the
- * AIL at the time we are called. We don't need to access the contents of
- * any of the objects, so the lock is not needed.
+ * Return the maximum lsn held in the AIL, or zero if the AIL is empty.
  */
-void
-xfs_trans_ail_push(
-	struct xfs_ail	*ailp,
-	xfs_lsn_t	threshold_lsn)
+static xfs_lsn_t
+xfs_ail_max_lsn(
+	struct xfs_ail  *ailp)
 {
-	xfs_log_item_t	*lip;
+	xfs_lsn_t       lsn = 0;
+	xfs_log_item_t  *lip;
 
-	lip = xfs_ail_min(ailp);
-	if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
-		if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0)
-			xfsaild_wakeup(ailp, threshold_lsn);
-	}
+	spin_lock(&ailp->xa_lock);
+	lip = xfs_ail_max(ailp);
+	if (lip)
+		lsn = lip->li_lsn;
+	spin_unlock(&ailp->xa_lock);
+
+	return lsn;
 }
 
 /*
@@ -236,35 +300,78 @@ out:
 }
 
 /*
- * xfsaild_push does the work of pushing on the AIL.  Returning a timeout of
- * zero indicates that the caller should sleep until woken.
+ * splice the log item list into the AIL at the given LSN.
  */
-long
-xfsaild_push(
-	struct xfs_ail	*ailp,
-	xfs_lsn_t	*last_lsn)
+static void
+xfs_ail_splice(
+	struct xfs_ail  *ailp,
+	struct list_head *list,
+	xfs_lsn_t       lsn)
 {
-	long		tout = 0;
-	xfs_lsn_t	last_pushed_lsn = *last_lsn;
-	xfs_lsn_t	target =  ailp->xa_target;
-	xfs_lsn_t	lsn;
-	xfs_log_item_t	*lip;
-	int		flush_log, count, stuck;
-	xfs_mount_t	*mp = ailp->xa_mount;
+	xfs_log_item_t  *next_lip;
+
+	/* If the list is empty, just insert the item.  */
+	if (list_empty(&ailp->xa_ail)) {
+		list_splice(list, &ailp->xa_ail);
+		return;
+	}
+
+	list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
+		if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
+			break;
+	}
+
+	ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
+	       XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
+
+	list_splice_init(list, &next_lip->li_ail);
+}
+
+/*
+ * Delete the given item from the AIL.  Return a pointer to the item.
+ */
+static void
+xfs_ail_delete(
+	struct xfs_ail  *ailp,
+	xfs_log_item_t  *lip)
+{
+	xfs_ail_check(ailp, lip);
+	list_del(&lip->li_ail);
+	xfs_trans_ail_cursor_clear(ailp, lip);
+}
+
+/*
+ * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
+ * to run at a later time if there is more work to do to complete the push.
+ */
+STATIC void
+xfs_ail_worker(
+	struct work_struct	*work)
+{
+	struct xfs_ail		*ailp = container_of(to_delayed_work(work),
+					struct xfs_ail, xa_work);
+	xfs_mount_t		*mp = ailp->xa_mount;
 	struct xfs_ail_cursor	*cur = &ailp->xa_cursors;
-	int		push_xfsbufd = 0;
+	xfs_log_item_t		*lip;
+	xfs_lsn_t		lsn;
+	xfs_lsn_t		target;
+	long			tout = 10;
+	int			flush_log = 0;
+	int			stuck = 0;
+	int			count = 0;
+	int			push_xfsbufd = 0;
 
 	spin_lock(&ailp->xa_lock);
+	target = ailp->xa_target;
 	xfs_trans_ail_cursor_init(ailp, cur);
-	lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn);
+	lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
 	if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
 		/*
 		 * AIL is empty or our push has reached the end.
 		 */
 		xfs_trans_ail_cursor_done(ailp, cur);
 		spin_unlock(&ailp->xa_lock);
-		*last_lsn = 0;
-		return tout;
+		goto out_done;
 	}
 
 	XFS_STATS_INC(xs_push_ail);
@@ -281,8 +388,7 @@ xfsaild_push(
 	 * lots of contention on the AIL lists.
 	 */
 	lsn = lip->li_lsn;
-	flush_log = stuck = count = 0;
-	while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
+	while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
 		int	lock_result;
 		/*
 		 * If we can lock the item without sleeping, unlock the AIL
@@ -301,13 +407,13 @@ xfsaild_push(
 		case XFS_ITEM_SUCCESS:
 			XFS_STATS_INC(xs_push_ail_success);
 			IOP_PUSH(lip);
-			last_pushed_lsn = lsn;
+			ailp->xa_last_pushed_lsn = lsn;
 			break;
 
 		case XFS_ITEM_PUSHBUF:
 			XFS_STATS_INC(xs_push_ail_pushbuf);
 			IOP_PUSHBUF(lip);
-			last_pushed_lsn = lsn;
+			ailp->xa_last_pushed_lsn = lsn;
 			push_xfsbufd = 1;
 			break;
 
@@ -319,7 +425,7 @@ xfsaild_push(
 
 		case XFS_ITEM_LOCKED:
 			XFS_STATS_INC(xs_push_ail_locked);
-			last_pushed_lsn = lsn;
+			ailp->xa_last_pushed_lsn = lsn;
 			stuck++;
 			break;
 
@@ -374,9 +480,27 @@ xfsaild_push(
 		wake_up_process(mp->m_ddev_targp->bt_task);
 	}
 
+	/* assume we have more work to do in a short while */
+out_done:
 	if (!count) {
 		/* We're past our target or empty, so idle */
-		last_pushed_lsn = 0;
+		ailp->xa_last_pushed_lsn = 0;
+
+		/*
+		 * We clear the XFS_AIL_PUSHING_BIT first before checking
+		 * whether the target has changed. If the target has changed,
+		 * this pushes the requeue race directly onto the result of the
+		 * atomic test/set bit, so we are guaranteed that either the
+		 * the pusher that changed the target or ourselves will requeue
+		 * the work (but not both).
+		 */
+		clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
+		smp_rmb();
+		if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
+		    test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
+			return;
+
+		tout = 50;
 	} else if (XFS_LSN_CMP(lsn, target) >= 0) {
 		/*
 		 * We reached the target so wait a bit longer for I/O to
@@ -384,7 +508,7 @@ xfsaild_push(
 		 * start the next scan from the start of the AIL.
 		 */
 		tout = 50;
-		last_pushed_lsn = 0;
+		ailp->xa_last_pushed_lsn = 0;
 	} else if ((stuck * 100) / count > 90) {
 		/*
 		 * Either there is a lot of contention on the AIL or we
@@ -396,14 +520,61 @@ xfsaild_push(
 		 * continuing from where we were.
 		 */
 		tout = 20;
-	} else {
-		/* more to do, but wait a short while before continuing */
-		tout = 10;
 	}
-	*last_lsn = last_pushed_lsn;
-	return tout;
+
+	/* There is more to do, requeue us.  */
+	queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
+					msecs_to_jiffies(tout));
+}
+
+/*
+ * This routine is called to move the tail of the AIL forward.  It does this by
+ * trying to flush items in the AIL whose lsns are below the given
+ * threshold_lsn.
+ *
+ * The push is run asynchronously in a workqueue, which means the caller needs
+ * to handle waiting on the async flush for space to become available.
+ * We don't want to interrupt any push that is in progress, hence we only queue
+ * work if we set the pushing bit approriately.
+ *
+ * We do this unlocked - we only need to know whether there is anything in the
+ * AIL at the time we are called. We don't need to access the contents of
+ * any of the objects, so the lock is not needed.
+ */
+void
+xfs_ail_push(
+	struct xfs_ail	*ailp,
+	xfs_lsn_t	threshold_lsn)
+{
+	xfs_log_item_t	*lip;
+
+	lip = xfs_ail_min(ailp);
+	if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
+	    XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
+		return;
+
+	/*
+	 * Ensure that the new target is noticed in push code before it clears
+	 * the XFS_AIL_PUSHING_BIT.
+	 */
+	smp_wmb();
+	xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
+	if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
+		queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
 }
 
+/*
+ * Push out all items in the AIL immediately
+ */
+void
+xfs_ail_push_all(
+	struct xfs_ail  *ailp)
+{
+	xfs_lsn_t       threshold_lsn = xfs_ail_max_lsn(ailp);
+
+	if (threshold_lsn)
+		xfs_ail_push(ailp, threshold_lsn);
+}
 
 /*
  * This is to be called when an item is unlocked that may have
@@ -615,7 +786,6 @@ xfs_trans_ail_init(
 	xfs_mount_t	*mp)
 {
 	struct xfs_ail	*ailp;
-	int		error;
 
 	ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
 	if (!ailp)
@@ -624,15 +794,9 @@ xfs_trans_ail_init(
 	ailp->xa_mount = mp;
 	INIT_LIST_HEAD(&ailp->xa_ail);
 	spin_lock_init(&ailp->xa_lock);
-	error = xfsaild_start(ailp);
-	if (error)
-		goto out_free_ailp;
+	INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
 	mp->m_ail = ailp;
 	return 0;
-
-out_free_ailp:
-	kmem_free(ailp);
-	return error;
 }
 
 void
@@ -641,124 +805,6 @@ xfs_trans_ail_destroy(
 {
 	struct xfs_ail	*ailp = mp->m_ail;
 
-	xfsaild_stop(ailp);
+	cancel_delayed_work_sync(&ailp->xa_work);
 	kmem_free(ailp);
 }
-
-/*
- * splice the log item list into the AIL at the given LSN.
- */
-STATIC void
-xfs_ail_splice(
-	struct xfs_ail	*ailp,
-	struct list_head *list,
-	xfs_lsn_t	lsn)
-{
-	xfs_log_item_t	*next_lip;
-
-	/*
-	 * If the list is empty, just insert the item.
-	 */
-	if (list_empty(&ailp->xa_ail)) {
-		list_splice(list, &ailp->xa_ail);
-		return;
-	}
-
-	list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
-		if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
-			break;
-	}
-
-	ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
-	       (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
-
-	list_splice_init(list, &next_lip->li_ail);
-	return;
-}
-
-/*
- * Delete the given item from the AIL.  Return a pointer to the item.
- */
-STATIC void
-xfs_ail_delete(
-	struct xfs_ail	*ailp,
-	xfs_log_item_t	*lip)
-{
-	xfs_ail_check(ailp, lip);
-	list_del(&lip->li_ail);
-	xfs_trans_ail_cursor_clear(ailp, lip);
-}
-
-/*
- * Return a pointer to the first item in the AIL.
- * If the AIL is empty, then return NULL.
- */
-STATIC xfs_log_item_t *
-xfs_ail_min(
-	struct xfs_ail	*ailp)
-{
-	if (list_empty(&ailp->xa_ail))
-		return NULL;
-
-	return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
-}
-
-/*
- * Return a pointer to the item which follows
- * the given item in the AIL.  If the given item
- * is the last item in the list, then return NULL.
- */
-STATIC xfs_log_item_t *
-xfs_ail_next(
-	struct xfs_ail	*ailp,
-	xfs_log_item_t	*lip)
-{
-	if (lip->li_ail.next == &ailp->xa_ail)
-		return NULL;
-
-	return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
-}
-
-#ifdef DEBUG
-/*
- * Check that the list is sorted as it should be.
- */
-STATIC void
-xfs_ail_check(
-	struct xfs_ail	*ailp,
-	xfs_log_item_t	*lip)
-{
-	xfs_log_item_t	*prev_lip;
-
-	if (list_empty(&ailp->xa_ail))
-		return;
-
-	/*
-	 * Check the next and previous entries are valid.
-	 */
-	ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
-	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
-	if (&prev_lip->li_ail != &ailp->xa_ail)
-		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
-
-	prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
-	if (&prev_lip->li_ail != &ailp->xa_ail)
-		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
-
-
-#ifdef XFS_TRANS_DEBUG
-	/*
-	 * Walk the list checking lsn ordering, and that every entry has the
-	 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
-	 * when specifically debugging the transaction subsystem.
-	 */
-	prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
-	list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
-		if (&prev_lip->li_ail != &ailp->xa_ail)
-			ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
-		ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
-		prev_lip = lip;
-	}
-#endif /* XFS_TRANS_DEBUG */
-}
-#endif /* DEBUG */
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 35162c238fa3..6b164e9e9a1f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -65,16 +65,22 @@ struct xfs_ail_cursor {
 struct xfs_ail {
 	struct xfs_mount	*xa_mount;
 	struct list_head	xa_ail;
-	uint			xa_gen;
-	struct task_struct	*xa_task;
 	xfs_lsn_t		xa_target;
 	struct xfs_ail_cursor	xa_cursors;
 	spinlock_t		xa_lock;
+	struct delayed_work	xa_work;
+	xfs_lsn_t		xa_last_pushed_lsn;
+	unsigned long		xa_flags;
 };
 
+#define XFS_AIL_PUSHING_BIT	0
+
 /*
  * From xfs_trans_ail.c
  */
+
+extern struct workqueue_struct	*xfs_ail_wq;	/* AIL workqueue */
+
 void	xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
 				struct xfs_log_item **log_items, int nr_items,
 				xfs_lsn_t lsn) __releases(ailp->xa_lock);
@@ -98,12 +104,13 @@ xfs_trans_ail_delete(
 	xfs_trans_ail_delete_bulk(ailp, &lip, 1);
 }
 
-void			xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
+void			xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
+void			xfs_ail_push_all(struct xfs_ail *);
+xfs_lsn_t		xfs_ail_min_lsn(struct xfs_ail *ailp);
+
 void			xfs_trans_unlocked_item(struct xfs_ail *,
 					xfs_log_item_t *);
 
-xfs_lsn_t		xfs_trans_ail_tail(struct xfs_ail *ailp);
-
 struct xfs_log_item	*xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
 					struct xfs_ail_cursor *cur,
 					xfs_lsn_t lsn);
@@ -112,11 +119,6 @@ struct xfs_log_item	*xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
 void			xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
 					struct xfs_ail_cursor *cur);
 
-long	xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
-void	xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
-int	xfsaild_start(struct xfs_ail *);
-void	xfsaild_stop(struct xfs_ail *);
-
 #if BITS_PER_LONG != 64
 static inline void
 xfs_trans_ail_copy_lsn(
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index bd297a20ab98..077c00d94f6e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -170,6 +170,10 @@
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
 	/* implement dynamic printk debug */				\
+	. = ALIGN(8);                                                   \
+	VMLINUX_SYMBOL(__start___jump_table) = .;                       \
+	*(__jump_table)                                                 \
+	VMLINUX_SYMBOL(__stop___jump_table) = .;                        \
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___verbose) = .;                          \
 	*(__verbose)                                                    \
@@ -228,8 +232,6 @@
 									\
 	BUG_TABLE							\
 									\
-	JUMP_TABLE							\
-									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
@@ -274,70 +276,70 @@
 	/* Kernel symbol table: Normal symbols */			\
 	__ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start___ksymtab) = .;			\
-		*(__ksymtab)						\
+		*(SORT(___ksymtab+*))					\
 		VMLINUX_SYMBOL(__stop___ksymtab) = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__ksymtab_gpl     : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___ksymtab_gpl) = .;		\
-		*(__ksymtab_gpl)					\
+		*(SORT(___ksymtab_gpl+*))				\
 		VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___ksymtab_unused) = .;		\
-		*(__ksymtab_unused)					\
+		*(SORT(___ksymtab_unused+*))				\
 		VMLINUX_SYMBOL(__stop___ksymtab_unused) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .;	\
-		*(__ksymtab_unused_gpl)					\
+		*(SORT(___ksymtab_unused_gpl+*))			\
 		VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .;	\
-		*(__ksymtab_gpl_future)					\
+		*(SORT(___ksymtab_gpl_future+*))			\
 		VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: Normal symbols */			\
 	__kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start___kcrctab) = .;			\
-		*(__kcrctab)						\
+		*(SORT(___kcrctab+*))					\
 		VMLINUX_SYMBOL(__stop___kcrctab) = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__kcrctab_gpl     : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___kcrctab_gpl) = .;		\
-		*(__kcrctab_gpl)					\
+		*(SORT(___kcrctab_gpl+*))				\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start___kcrctab_unused) = .;		\
-		*(__kcrctab_unused)					\
+		*(SORT(___kcrctab_unused+*))				\
 		VMLINUX_SYMBOL(__stop___kcrctab_unused) = .;		\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .;	\
-		*(__kcrctab_unused_gpl)					\
+		*(SORT(___kcrctab_unused_gpl+*))			\
 		VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .;	\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
 		VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .;	\
-		*(__kcrctab_gpl_future)					\
+		*(SORT(___kcrctab_gpl_future+*))			\
 		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;	\
 	}								\
 									\
@@ -589,14 +591,6 @@
 #define BUG_TABLE
 #endif
 
-#define JUMP_TABLE							\
-	. = ALIGN(8);							\
-	__jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___jump_table) = .;		\
-		*(__jump_table)						\
-		VMLINUX_SYMBOL(__stop___jump_table) = .;		\
-	}
-
 #ifdef CONFIG_PM_TRACE
 #define TRACEDATA							\
 	. = ALIGN(4);							\
diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h
index aaab875e1a35..6028fb862254 100644
--- a/include/asm-generic/xor.h
+++ b/include/asm-generic/xor.h
@@ -13,7 +13,7 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <asm/processor.h>
+#include <linux/prefetch.h>
 
 static void
 xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index f22e7fe4b6db..c99c3d3e7811 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -118,6 +118,7 @@ int drm_fb_helper_setcolreg(unsigned regno,
 			    unsigned transp,
 			    struct fb_info *info);
 
+bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper);
 void drm_fb_helper_restore(void);
 void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper,
 			    uint32_t fb_width, uint32_t fb_height);
@@ -126,7 +127,7 @@ void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
 
 int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info);
 
-bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper);
+int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper);
 bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel);
 int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper);
 int drm_fb_helper_debug_enter(struct fb_info *info);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index c2f93a8ae2e1..564b14aa7e16 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -86,7 +86,7 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
 }
 #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \
 						&(mm)->head_node.node_list, \
-						node_list);
+						node_list)
 #define drm_mm_for_each_scanned_node_reverse(entry, n, mm) \
 	for (entry = (mm)->prev_scanned_node, \
 		next = entry ? list_entry(entry->node_list.next, \
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 816e30cbd968..f04b2a3b0f49 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -155,6 +155,7 @@
 	{0x1002, 0x6719, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x671c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x671d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x671f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6720, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6721, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6722, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
@@ -167,6 +168,7 @@
 	{0x1002, 0x6729, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6738, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6739, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x673e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6740, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6741, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6742, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
@@ -199,6 +201,7 @@
 	{0x1002, 0x688D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6898, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6899, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x689b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x689c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x689d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x689e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
@@ -209,7 +212,9 @@
 	{0x1002, 0x68b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68b8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68b9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68ba, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68be, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68bf, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68c1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x68c7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 3bce1a4fc305..787f7b6fd622 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -909,6 +909,8 @@ struct drm_radeon_cs {
 #define RADEON_INFO_WANT_CMASK		0x08 /* get access to CMASK on r300 */
 #define RADEON_INFO_CLOCK_CRYSTAL_FREQ	0x09 /* clock crystal frequency */
 #define RADEON_INFO_NUM_BACKENDS	0x0a /* DB/backends for r600+ - need for OQ */
+#define RADEON_INFO_NUM_TILE_PIPES	0x0b /* tile pipes for r600+ */
+#define RADEON_INFO_FUSION_GART_WORKING	0x0c /* fusion writes to GTT were broken before this */
 
 struct drm_radeon_info {
 	uint32_t		request;
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
new file mode 100644
index 000000000000..c5d6095b46f8
--- /dev/null
+++ b/include/linux/alarmtimer.h
@@ -0,0 +1,40 @@
+#ifndef _LINUX_ALARMTIMER_H
+#define _LINUX_ALARMTIMER_H
+
+#include <linux/time.h>
+#include <linux/hrtimer.h>
+#include <linux/timerqueue.h>
+#include <linux/rtc.h>
+
+enum alarmtimer_type {
+	ALARM_REALTIME,
+	ALARM_BOOTTIME,
+
+	ALARM_NUMTYPE,
+};
+
+/**
+ * struct alarm - Alarm timer structure
+ * @node:	timerqueue node for adding to the event list this value
+ *		also includes the expiration time.
+ * @period:	Period for recuring alarms
+ * @function:	Function pointer to be executed when the timer fires.
+ * @type:	Alarm type (BOOTTIME/REALTIME)
+ * @enabled:	Flag that represents if the alarm is set to fire or not
+ * @data:	Internal data value.
+ */
+struct alarm {
+	struct timerqueue_node	node;
+	ktime_t			period;
+	void			(*function)(struct alarm *);
+	enum alarmtimer_type	type;
+	bool			enabled;
+	void			*data;
+};
+
+void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
+		void (*function)(struct alarm *));
+void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period);
+void alarm_cancel(struct alarm *alarm);
+
+#endif
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index e612575a2596..b4326bfa684f 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -23,11 +23,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr)
 	preempt_disable();
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 	while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
-		while (test_bit(bitnum, addr)) {
-			preempt_enable();
+		preempt_enable();
+		do {
 			cpu_relax();
-			preempt_disable();
-		}
+		} while (test_bit(bitnum, addr));
+		preempt_disable();
 	}
 #endif
 	__acquire(bitlock);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 32176cc8e715..2ad95fa1d130 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -388,20 +388,19 @@ struct request_queue
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
-#define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
-#define QUEUE_FLAG_ELVSWITCH	7	/* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_BIDI		8	/* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES     9	/* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP   10	/* force complete on same CPU */
-#define QUEUE_FLAG_FAIL_IO     11	/* fake timeout */
-#define QUEUE_FLAG_STACKABLE   12	/* supports request stacking */
-#define QUEUE_FLAG_NONROT      13	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_ELVSWITCH	6	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP	9	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     10	/* fake timeout */
+#define QUEUE_FLAG_STACKABLE   11	/* supports request stacking */
+#define QUEUE_FLAG_NONROT      12	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES   17	/* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM  18	/* Contributes to random pool */
-#define QUEUE_FLAG_SECDISCARD  19	/* supports SECDISCARD */
+#define QUEUE_FLAG_IO_STAT     13	/* do IO stats */
+#define QUEUE_FLAG_DISCARD     14	/* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES   15	/* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
+#define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -697,8 +696,9 @@ extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
+extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
+extern void blk_run_queue_async(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
@@ -857,26 +857,39 @@ extern void blk_put_queue(struct request_queue *);
 struct blk_plug {
 	unsigned long magic;
 	struct list_head list;
+	struct list_head cb_list;
 	unsigned int should_sort;
 };
+struct blk_plug_cb {
+	struct list_head list;
+	void (*callback)(struct blk_plug_cb *);
+};
 
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_finish_plug(struct blk_plug *);
-extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
+extern void blk_flush_plug_list(struct blk_plug *, bool);
 
 static inline void blk_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
-	if (unlikely(plug))
-		__blk_flush_plug(tsk, plug);
+	if (plug)
+		blk_flush_plug_list(plug, false);
+}
+
+static inline void blk_schedule_flush_plug(struct task_struct *tsk)
+{
+	struct blk_plug *plug = tsk->plug;
+
+	if (plug)
+		blk_flush_plug_list(plug, true);
 }
 
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	struct blk_plug *plug = tsk->plug;
 
-	return plug && !list_empty(&plug->list);
+	return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
 }
 
 /*
@@ -1314,6 +1327,11 @@ static inline void blk_flush_plug(struct task_struct *task)
 {
 }
 
+static inline void blk_schedule_flush_plug(struct task_struct *task)
+{
+}
+
+
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	return false;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b8613e806aa9..01eca1794e14 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -111,6 +111,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_node_nopanic(pgdat, x) \
+	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node_nopanic(pgdat, x) \
diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h
new file mode 100644
index 000000000000..90b1aa867224
--- /dev/null
+++ b/include/linux/bsearch.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_BSEARCH_H
+#define _LINUX_BSEARCH_H
+
+#include <linux/types.h>
+
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+	      int (*cmp)(const void *key, const void *elt));
+
+#endif /* _LINUX_BSEARCH_H */
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 16ee8b49a200..4554db0cde86 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -355,7 +355,12 @@ struct cpu_vfs_cap_data {
 
 #define CAP_SYSLOG           34
 
-#define CAP_LAST_CAP         CAP_SYSLOG
+/* Allow triggering something that will wake the system */
+
+#define CAP_WAKE_ALARM            35
+
+
+#define CAP_LAST_CAP         CAP_WAKE_ALARM
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
@@ -546,18 +551,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool task_ns_capable(struct task_struct *t, int cap);
-
-/**
- * nsown_capable - Check superior capability to one's own user_ns
- * @cap: The capability in question
- *
- * Return true if the current task has the given superior capability
- * targeted at its own user namespace.
- */
-static inline bool nsown_capable(int cap)
-{
-	return ns_capable(current_user_ns(), cap);
-}
+extern bool nsown_capable(int cap);
 
 /* audit system wants to get cap info from files as well */
 extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index fc53492b6ad7..d6733e27af34 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -56,46 +56,52 @@ enum clock_event_nofitiers {
 
 /**
  * struct clock_event_device - clock event device descriptor
- * @name:		ptr to clock event name
- * @features:		features
+ * @event_handler:	Assigned by the framework to be called by the low
+ *			level handler of the event source
+ * @set_next_event:	set next event function
+ * @next_event:		local storage for the next event in oneshot mode
  * @max_delta_ns:	maximum delta value in ns
  * @min_delta_ns:	minimum delta value in ns
  * @mult:		nanosecond to cycles multiplier
  * @shift:		nanoseconds to cycles divisor (power of two)
+ * @mode:		operating mode assigned by the management code
+ * @features:		features
+ * @retries:		number of forced programming retries
+ * @set_mode:		set mode function
+ * @broadcast:		function to broadcast events
+ * @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
+ * @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
+ * @name:		ptr to clock event name
  * @rating:		variable to rate clock event devices
  * @irq:		IRQ number (only for non CPU local devices)
  * @cpumask:		cpumask to indicate for which CPUs this device works
- * @set_next_event:	set next event function
- * @set_mode:		set mode function
- * @event_handler:	Assigned by the framework to be called by the low
- *			level handler of the event source
- * @broadcast:		function to broadcast events
  * @list:		list head for the management code
- * @mode:		operating mode assigned by the management code
- * @next_event:		local storage for the next event in oneshot mode
- * @retries:		number of forced programming retries
  */
 struct clock_event_device {
-	const char		*name;
-	unsigned int		features;
+	void			(*event_handler)(struct clock_event_device *);
+	int			(*set_next_event)(unsigned long evt,
+						  struct clock_event_device *);
+	ktime_t			next_event;
 	u64			max_delta_ns;
 	u64			min_delta_ns;
 	u32			mult;
 	u32			shift;
+	enum clock_event_mode	mode;
+	unsigned int		features;
+	unsigned long		retries;
+
+	void			(*broadcast)(const struct cpumask *mask);
+	void			(*set_mode)(enum clock_event_mode mode,
+					    struct clock_event_device *);
+	unsigned long		min_delta_ticks;
+	unsigned long		max_delta_ticks;
+
+	const char		*name;
 	int			rating;
 	int			irq;
 	const struct cpumask	*cpumask;
-	int			(*set_next_event)(unsigned long evt,
-						  struct clock_event_device *);
-	void			(*set_mode)(enum clock_event_mode mode,
-					    struct clock_event_device *);
-	void			(*event_handler)(struct clock_event_device *);
-	void			(*broadcast)(const struct cpumask *mask);
 	struct list_head	list;
-	enum clock_event_mode	mode;
-	ktime_t			next_event;
-	unsigned long		retries;
-};
+} ____cacheline_aligned;
 
 /*
  * Calculate a multiplication factor for scaled math, which is used to convert
@@ -122,6 +128,12 @@ extern u64 clockevent_delta2ns(unsigned long latch,
 			       struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 
+extern void clockevents_config_and_register(struct clock_event_device *dev,
+					    u32 freq, unsigned long min_delta,
+					    unsigned long max_delta);
+
+extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
+
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
 extern void clockevents_set_mode(struct clock_event_device *dev,
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c37b21ad5a3b..c918fbd33ee5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -159,42 +159,38 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  */
 struct clocksource {
 	/*
-	 * First part of structure is read mostly
+	 * Hotpath data, fits in a single cache line when the
+	 * clocksource itself is cacheline aligned.
 	 */
-	char *name;
-	struct list_head list;
-	int rating;
 	cycle_t (*read)(struct clocksource *cs);
-	int (*enable)(struct clocksource *cs);
-	void (*disable)(struct clocksource *cs);
+	cycle_t cycle_last;
 	cycle_t mask;
 	u32 mult;
 	u32 shift;
 	u64 max_idle_ns;
-	unsigned long flags;
-	cycle_t (*vread)(void);
-	void (*suspend)(struct clocksource *cs);
-	void (*resume)(struct clocksource *cs);
+
 #ifdef CONFIG_IA64
 	void *fsys_mmio;        /* used by fsyscall asm code */
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      ((mmio) = (addr))
 #else
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      do { } while (0)
 #endif
-
-	/*
-	 * Second part is written at each timer interrupt
-	 * Keep it in a different cache line to dirty no
-	 * more than one cache line.
-	 */
-	cycle_t cycle_last ____cacheline_aligned_in_smp;
+	const char *name;
+	struct list_head list;
+	int rating;
+	cycle_t (*vread)(void);
+	int (*enable)(struct clocksource *cs);
+	void (*disable)(struct clocksource *cs);
+	unsigned long flags;
+	void (*suspend)(struct clocksource *cs);
+	void (*resume)(struct clocksource *cs);
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
 	struct list_head wd_list;
 	cycle_t wd_last;
 #endif
-};
+} ____cacheline_aligned;
 
 /*
  * Clock source flags bits::
@@ -341,4 +337,6 @@ static inline void update_vsyscall_tz(void)
 
 extern void timekeeping_notify(struct clocksource *clock);
 
+extern int clocksource_i8253_init(void);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9343dd3de858..11be48e0d168 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2001 Russell King
  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *            
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -56,9 +56,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb,
 #define CPUFREQ_POLICY_POWERSAVE	(1)
 #define CPUFREQ_POLICY_PERFORMANCE	(2)
 
-/* Frequency values here are CPU kHz so that hardware which doesn't run 
- * with some frequencies can complain without having to guess what per 
- * cent / per mille means. 
+/* Frequency values here are CPU kHz so that hardware which doesn't run
+ * with some frequencies can complain without having to guess what per
+ * cent / per mille means.
  * Maximum transition latency is in nanoseconds - if it's unknown,
  * CPUFREQ_ETERNAL shall be used.
  */
@@ -72,13 +72,15 @@ extern struct kobject *cpufreq_global_kobject;
 struct cpufreq_cpuinfo {
 	unsigned int		max_freq;
 	unsigned int		min_freq;
-	unsigned int		transition_latency; /* in 10^(-9) s = nanoseconds */
+
+	/* in 10^(-9) s = nanoseconds */
+	unsigned int		transition_latency;
 };
 
 struct cpufreq_real_policy {
 	unsigned int		min;    /* in kHz */
 	unsigned int		max;    /* in kHz */
-        unsigned int		policy; /* see above */
+	unsigned int		policy; /* see above */
 	struct cpufreq_governor	*governor; /* see below */
 };
 
@@ -94,7 +96,7 @@ struct cpufreq_policy {
 	unsigned int		max;    /* in kHz */
 	unsigned int		cur;    /* in kHz, only needed if cpufreq
 					 * governors are used */
-        unsigned int		policy; /* see above */
+	unsigned int		policy; /* see above */
 	struct cpufreq_governor	*governor; /* see below */
 
 	struct work_struct	update; /* if update_policy() needs to be
@@ -167,11 +169,11 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu
 
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
-	int 	(*governor)	(struct cpufreq_policy *policy,
+	int	(*governor)	(struct cpufreq_policy *policy,
 				 unsigned int event);
 	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
 					 char *buf);
-	int 	(*store_setspeed)	(struct cpufreq_policy *policy,
+	int	(*store_setspeed)	(struct cpufreq_policy *policy,
 					 unsigned int freq);
 	unsigned int max_transition_latency; /* HW must be able to switch to
 			next freq faster than this value in nano secs or we
@@ -180,7 +182,8 @@ struct cpufreq_governor {
 	struct module		*owner;
 };
 
-/* pass a target to the cpufreq driver 
+/*
+ * Pass a target to the cpufreq driver.
  */
 extern int cpufreq_driver_target(struct cpufreq_policy *policy,
 				 unsigned int target_freq,
@@ -237,9 +240,9 @@ struct cpufreq_driver {
 
 /* flags */
 
-#define CPUFREQ_STICKY		0x01	/* the driver isn't removed even if 
+#define CPUFREQ_STICKY		0x01	/* the driver isn't removed even if
 					 * all ->init() calls failed */
-#define CPUFREQ_CONST_LOOPS 	0x02	/* loops_per_jiffy or other kernel
+#define CPUFREQ_CONST_LOOPS	0x02	/* loops_per_jiffy or other kernel
 					 * "constants" aren't affected by
 					 * frequency transitions */
 #define CPUFREQ_PM_NO_WARN	0x04	/* don't warn on suspend/resume speed
@@ -252,7 +255,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state);
 
 
-static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) 
+static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max)
 {
 	if (policy->min < min)
 		policy->min = min;
@@ -386,34 +389,15 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 /* the following 3 funtions are for cpufreq core use only */
 struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
-void   cpufreq_cpu_put (struct cpufreq_policy *data);
+void   cpufreq_cpu_put(struct cpufreq_policy *data);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 
-void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, 
+void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 
 
-/*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-
-#define CPUFREQ_DEBUG_CORE	1
-#define CPUFREQ_DEBUG_DRIVER	2
-#define CPUFREQ_DEBUG_GOVERNOR	4
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-extern void cpufreq_debug_printk(unsigned int type, const char *prefix, 
-				 const char *fmt, ...);
-
-#else
-
-#define cpufreq_debug_printk(msg...) do { } while(0)
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 9aeeb0ba2003..be16b61283cc 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -146,6 +146,7 @@ struct cred {
 	void		*security;	/* subjective LSM security */
 #endif
 	struct user_struct *user;	/* real user ID subscription */
+	struct user_namespace *user_ns; /* cached user->user_ns */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	struct rcu_head	rcu;		/* RCU deletion hook */
 };
@@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
-#define _current_user_ns()	(current_cred_xxx(user)->user_ns)
 #define current_security()	(current_cred_xxx(security))
 
-extern struct user_namespace *current_user_ns(void);
+#ifdef CONFIG_USER_NS
+#define current_user_ns() (current_cred_xxx(user_ns))
+#else
+extern struct user_namespace init_user_ns;
+#define current_user_ns() (&init_user_ns)
+#endif
+
 
 #define current_uid_gid(_uid, _gid)		\
 do {						\
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f2afed4fa945..19d90a55541d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -197,7 +197,7 @@ struct dentry_operations {
       * typically using d_splice_alias. */
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
-#define DCACHE_UNHASHED		0x0010	
+#define DCACHE_RCUACCESS	0x0010	/* Entry has ever been RCU-visible */
 #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
      /* Parent inode is watched by inotify */
 
@@ -384,7 +384,7 @@ extern struct dentry *dget_parent(struct dentry *dentry);
  
 static inline int d_unhashed(struct dentry *dentry)
 {
-	return (dentry->d_flags & DCACHE_UNHASHED);
+	return hlist_bl_unhashed(&dentry->d_hash);
 }
 
 static inline int d_unlinked(struct dentry *dentry)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e2768834f397..32a4423710f5 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -197,7 +197,6 @@ struct dm_target {
 struct dm_target_callbacks {
 	struct list_head list;
 	int (*congested_fn) (struct dm_target_callbacks *, int);
-	void (*unplug_fn)(struct dm_target_callbacks *);
 };
 
 int dm_register_target(struct target_type *t);
diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..c66111affca9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -47,6 +47,38 @@ extern int __must_check bus_create_file(struct bus_type *,
 					struct bus_attribute *);
 extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
 
+/**
+ * struct bus_type - The bus type of the device
+ *
+ * @name:	The name of the bus.
+ * @bus_attrs:	Default attributes of the bus.
+ * @dev_attrs:	Default attributes of the devices on the bus.
+ * @drv_attrs:	Default attributes of the device drivers on the bus.
+ * @match:	Called, perhaps multiple times, whenever a new device or driver
+ *		is added for this bus. It should return a nonzero value if the
+ *		given device can be handled by the given driver.
+ * @uevent:	Called when a device is added, removed, or a few other things
+ *		that generate uevents to add the environment variables.
+ * @probe:	Called when a new device or driver add to this bus, and callback
+ *		the specific driver's probe to initial the matched device.
+ * @remove:	Called when a device removed from this bus.
+ * @shutdown:	Called at shut-down time to quiesce the device.
+ * @suspend:	Called when a device on this bus wants to go to sleep mode.
+ * @resume:	Called to bring a device on this bus out of sleep mode.
+ * @pm:		Power management operations of this bus, callback the specific
+ *		device driver's pm-ops.
+ * @p:		The private data of the driver core, only the driver core can
+ *		touch this.
+ *
+ * A bus is a channel between the processor and one or more devices. For the
+ * purposes of the device model, all devices are connected via a bus, even if
+ * it is an internal, virtual, "platform" bus. Buses can plug into each other.
+ * A USB controller is usually a PCI device, for example. The device model
+ * represents the actual connections between buses and the devices they control.
+ * A bus is represented by the bus_type structure. It contains the name, the
+ * default attributes, the bus' methods, PM operations, and the driver core's
+ * private data.
+ */
 struct bus_type {
 	const char		*name;
 	struct bus_attribute	*bus_attrs;
@@ -119,6 +151,37 @@ extern int bus_unregister_notifier(struct bus_type *bus,
 extern struct kset *bus_get_kset(struct bus_type *bus);
 extern struct klist *bus_get_device_klist(struct bus_type *bus);
 
+/**
+ * struct device_driver - The basic device driver structure
+ * @name:	Name of the device driver.
+ * @bus:	The bus which the device of this driver belongs to.
+ * @owner:	The module owner.
+ * @mod_name:	Used for built-in modules.
+ * @suppress_bind_attrs: Disables bind/unbind via sysfs.
+ * @of_match_table: The open firmware table.
+ * @probe:	Called to query the existence of a specific device,
+ *		whether this driver can work with it, and bind the driver
+ *		to a specific device.
+ * @remove:	Called when the device is removed from the system to
+ *		unbind a device from this driver.
+ * @shutdown:	Called at shut-down time to quiesce the device.
+ * @suspend:	Called to put the device to sleep mode. Usually to a
+ *		low power state.
+ * @resume:	Called to bring a device from sleep mode.
+ * @groups:	Default attributes that get created by the driver core
+ *		automatically.
+ * @pm:		Power management operations of the device which matched
+ *		this driver.
+ * @p:		Driver core's private data, no one other than the driver
+ *		core can touch this.
+ *
+ * The device driver-model tracks all of the drivers known to the system.
+ * The main reason for this tracking is to enable the driver core to match
+ * up drivers with new devices. Once drivers are known objects within the
+ * system, however, a number of other things become possible. Device drivers
+ * can export information and configuration variables that are independent
+ * of any specific device.
+ */
 struct device_driver {
 	const char		*name;
 	struct bus_type		*bus;
@@ -185,8 +248,34 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, void *data,
 				  int (*match)(struct device *dev, void *data));
 
-/*
- * device classes
+/**
+ * struct class - device classes
+ * @name:	Name of the class.
+ * @owner:	The module owner.
+ * @class_attrs: Default attributes of this class.
+ * @dev_attrs:	Default attributes of the devices belong to the class.
+ * @dev_bin_attrs: Default binary attributes of the devices belong to the class.
+ * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
+ * @dev_uevent:	Called when a device is added, removed from this class, or a
+ *		few other things that generate uevents to add the environment
+ *		variables.
+ * @devnode:	Callback to provide the devtmpfs.
+ * @class_release: Called to release this class.
+ * @dev_release: Called to release the device.
+ * @suspend:	Used to put the device to sleep mode, usually to a low power
+ *		state.
+ * @resume:	Used to bring the device from the sleep mode.
+ * @ns_type:	Callbacks so sysfs can detemine namespaces.
+ * @namespace:	Namespace of the device belongs to this class.
+ * @pm:		The default device power management operations of this class.
+ * @p:		The private data of the driver core, no one other than the
+ *		driver core can touch this.
+ *
+ * A class is a higher-level view of a device that abstracts out low-level
+ * implementation details. Drivers may see a SCSI disk or an ATA disk, but,
+ * at the class level, they are all simply disks. Classes allow user space
+ * to work with devices based on what they do, rather than how they are
+ * connected or how they work.
  */
 struct class {
 	const char		*name;
@@ -401,6 +490,65 @@ struct device_dma_parameters {
 	unsigned long segment_boundary_mask;
 };
 
+/**
+ * struct device - The basic device structure
+ * @parent:	The device's "parent" device, the device to which it is attached.
+ * 		In most cases, a parent device is some sort of bus or host
+ * 		controller. If parent is NULL, the device, is a top-level device,
+ * 		which is not usually what you want.
+ * @p:		Holds the private data of the driver core portions of the device.
+ * 		See the comment of the struct device_private for detail.
+ * @kobj:	A top-level, abstract class from which other classes are derived.
+ * @init_name:	Initial name of the device.
+ * @type:	The type of device.
+ * 		This identifies the device type and carries type-specific
+ * 		information.
+ * @mutex:	Mutex to synchronize calls to its driver.
+ * @bus:	Type of bus device is on.
+ * @driver:	Which driver has allocated this
+ * @platform_data: Platform data specific to the device.
+ * 		Example: For devices on custom boards, as typical of embedded
+ * 		and SOC based hardware, Linux often uses platform_data to point
+ * 		to board-specific structures describing devices and how they
+ * 		are wired.  That can include what ports are available, chip
+ * 		variants, which GPIO pins act in what additional roles, and so
+ * 		on.  This shrinks the "Board Support Packages" (BSPs) and
+ * 		minimizes board-specific #ifdefs in drivers.
+ * @power:	For device power management.
+ * 		See Documentation/power/devices.txt for details.
+ * @pwr_domain:	Provide callbacks that are executed during system suspend,
+ * 		hibernation, system resume and during runtime PM transitions
+ * 		along with subsystem-level and driver-level callbacks.
+ * @numa_node:	NUMA node this device is close to.
+ * @dma_mask:	Dma mask (if dma'ble device).
+ * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
+ * 		hardware supports 64-bit addresses for consistent allocations
+ * 		such descriptors.
+ * @dma_parms:	A low level driver may set these to teach IOMMU code about
+ * 		segment limitations.
+ * @dma_pools:	Dma pools (if dma'ble device).
+ * @dma_mem:	Internal for coherent mem override.
+ * @archdata:	For arch-specific additions.
+ * @of_node:	Associated device tree node.
+ * @of_match:	Matching of_device_id from driver.
+ * @devt:	For creating the sysfs "dev".
+ * @devres_lock: Spinlock to protect the resource of the device.
+ * @devres_head: The resources list of the device.
+ * @knode_class: The node used to add the device to the class list.
+ * @class:	The class of the device.
+ * @groups:	Optional attribute groups.
+ * @release:	Callback to free the device after all references have
+ * 		gone away. This should be set by the allocator of the
+ * 		device (i.e. the bus driver that discovered the device).
+ *
+ * At the lowest level, every device in a Linux system is represented by an
+ * instance of struct device. The device structure contains the information
+ * that the device model core needs to model the system. Most subsystems,
+ * however, track additional information about the devices they host. As a
+ * result, it is rare for devices to be represented by bare device structures;
+ * instead, that structure, like kobject structures, is usually embedded within
+ * a higher-level representation of the device.
+ */
 struct device {
 	struct device		*parent;
 
@@ -408,7 +556,7 @@ struct device {
 
 	struct kobject kobj;
 	const char		*init_name; /* initial name of the device */
-	struct device_type	*type;
+	const struct device_type *type;
 
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
@@ -442,7 +590,6 @@ struct device {
 	struct dev_archdata	archdata;
 
 	struct device_node	*of_node; /* associated device tree node */
-	const struct of_device_id *of_match; /* matching of_device_id from driver */
 
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 
@@ -557,7 +704,7 @@ extern int device_move(struct device *dev, struct device *new_parent,
 extern const char *device_get_devnode(struct device *dev,
 				      mode_t *mode, const char **tmp);
 extern void *dev_get_drvdata(const struct device *dev);
-extern void dev_set_drvdata(struct device *dev, void *data);
+extern int dev_set_drvdata(struct device *dev, void *data);
 
 /*
  * Root device objects for grouping under /sys/devices
@@ -611,7 +758,7 @@ extern int (*platform_notify)(struct device *dev);
 extern int (*platform_notify_remove)(struct device *dev);
 
 
-/**
+/*
  * get_device - atomically increment the reference count for the device.
  *
  */
@@ -633,13 +780,6 @@ static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
 /* drivers/base/power/shutdown.c */
 extern void device_shutdown(void);
 
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/* drivers/base/sys.c */
-extern void sysdev_shutdown(void);
-#else
-static inline void sysdev_shutdown(void) { }
-#endif
-
 /* debugging and troubleshooting/diagnostic helpers. */
 extern const char *dev_driver_string(const struct device *dev);
 
@@ -742,13 +882,17 @@ do {						     \
 #endif
 
 /*
- * dev_WARN() acts like dev_printk(), but with the key difference
+ * dev_WARN*() acts like dev_printk(), but with the key difference
  * of using a WARN/WARN_ON to get the message out, including the
  * file/line information and a backtrace.
  */
 #define dev_WARN(dev, format, arg...) \
 	WARN(1, "Device: %s\n" format, dev_driver_string(dev), ## arg);
 
+#define dev_WARN_ONCE(dev, condition, format, arg...) \
+	WARN_ONCE(condition, "Device %s\n" format, \
+			dev_driver_string(dev), ## arg)
+
 /* Create alias, so I can be autoloaded. */
 #define MODULE_ALIAS_CHARDEV(major,minor) \
 	MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 0c9653f11c18..e747ecd48e1c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,8 +1,6 @@
 #ifndef _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 
-#include <linux/jump_label.h>
-
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * use independent hash functions, to reduce the chance of false positives.
diff --git a/include/linux/fb.h b/include/linux/fb.h
index df728c1c29ed..6a8274877171 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -832,6 +832,7 @@ struct fb_tile_ops {
 #define FBINFO_CAN_FORCE_OUTPUT     0x200000
 
 struct fb_info {
+	atomic_t count;
 	int node;
 	int flags;
 	struct mutex lock;		/* Lock for open/release/ioctl funcs */
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 70e4efabe0fb..ebeb2f3ad068 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -61,7 +61,7 @@ struct flex_array {
 struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		gfp_t flags);
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-		unsigned int end, gfp_t flags);
+		unsigned int nr_elements, gfp_t flags);
 void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dbd860af0804..cdf9495df204 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -358,7 +358,6 @@ struct inodes_stat_t {
 #define FS_EXTENT_FL			0x00080000 /* Extents */
 #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
 #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
-#define FS_COW_FL			0x02000000 /* Cow file */
 #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ca29e03c1fac..9d88e1cb5dbb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -29,9 +29,22 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
+struct ftrace_hash;
+
+enum {
+	FTRACE_OPS_FL_ENABLED		= 1 << 0,
+	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
+	FTRACE_OPS_FL_DYNAMIC		= 1 << 2,
+};
+
 struct ftrace_ops {
-	ftrace_func_t	  func;
-	struct ftrace_ops *next;
+	ftrace_func_t			func;
+	struct ftrace_ops		*next;
+	unsigned long			flags;
+#ifdef CONFIG_DYNAMIC_FTRACE
+	struct ftrace_hash		*notrace_hash;
+	struct ftrace_hash		*filter_hash;
+#endif
 };
 
 extern int function_trace_stop;
@@ -146,14 +159,13 @@ extern void unregister_ftrace_function_probe_all(char *glob);
 extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
-	FTRACE_FL_FREE		= (1 << 0),
-	FTRACE_FL_FAILED	= (1 << 1),
-	FTRACE_FL_FILTER	= (1 << 2),
-	FTRACE_FL_ENABLED	= (1 << 3),
-	FTRACE_FL_NOTRACE	= (1 << 4),
-	FTRACE_FL_CONVERTED	= (1 << 5),
+	FTRACE_FL_ENABLED	= (1 << 30),
+	FTRACE_FL_FREE		= (1 << 31),
 };
 
+#define FTRACE_FL_MASK		(0x3UL << 30)
+#define FTRACE_REF_MAX		((1 << 30) - 1)
+
 struct dyn_ftrace {
 	union {
 		unsigned long		ip; /* address of mcount call-site */
@@ -167,7 +179,12 @@ struct dyn_ftrace {
 };
 
 int ftrace_force_update(void);
-void ftrace_set_filter(unsigned char *buf, int len, int reset);
+void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+		       int len, int reset);
+void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+			int len, int reset);
+void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
+void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 22b32af1b5ec..b5a550a39a70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -37,6 +37,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			padding;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index bfb8f934521e..56d8fc87fbbc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -353,6 +353,8 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
+/* This is different from alloc_pages_exact_node !!! */
+void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask), 0)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index df29c8fde36b..8847c8c29791 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 					 unsigned long end,
 					 long adjust_next)
 {
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		return;
 	__vma_adjust_trans_huge(vma, start, end, adjust_next);
 }
diff --git a/include/linux/init.h b/include/linux/init.h
index 577671c55153..9146f39cdddf 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -79,29 +79,29 @@
 #define __exitused  __used
 #endif
 
-#define __exit          __section(.exit.text) __exitused __cold
+#define __exit          __section(.exit.text) __exitused __cold notrace
 
 /* Used for HOTPLUG */
-#define __devinit        __section(.devinit.text) __cold
+#define __devinit        __section(.devinit.text) __cold notrace
 #define __devinitdata    __section(.devinit.data)
 #define __devinitconst   __section(.devinit.rodata)
-#define __devexit        __section(.devexit.text) __exitused __cold
+#define __devexit        __section(.devexit.text) __exitused __cold notrace
 #define __devexitdata    __section(.devexit.data)
 #define __devexitconst   __section(.devexit.rodata)
 
 /* Used for HOTPLUG_CPU */
-#define __cpuinit        __section(.cpuinit.text) __cold
+#define __cpuinit        __section(.cpuinit.text) __cold notrace
 #define __cpuinitdata    __section(.cpuinit.data)
 #define __cpuinitconst   __section(.cpuinit.rodata)
-#define __cpuexit        __section(.cpuexit.text) __exitused __cold
+#define __cpuexit        __section(.cpuexit.text) __exitused __cold notrace
 #define __cpuexitdata    __section(.cpuexit.data)
 #define __cpuexitconst   __section(.cpuexit.rodata)
 
 /* Used for MEMORY_HOTPLUG */
-#define __meminit        __section(.meminit.text) __cold
+#define __meminit        __section(.meminit.text) __cold notrace
 #define __meminitdata    __section(.meminit.data)
 #define __meminitconst   __section(.meminit.rodata)
-#define __memexit        __section(.memexit.text) __exitused __cold
+#define __memexit        __section(.memexit.text) __exitused __cold notrace
 #define __memexitdata    __section(.memexit.data)
 #define __memexitconst   __section(.memexit.rodata)
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index caa151fbebb7..689496bb6654 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -134,7 +134,6 @@ extern struct cred init_cred;
 	.stack		= &init_thread_info,				\
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= PF_KTHREAD,					\
-	.lock_depth	= -1,						\
 	.prio		= MAX_PRIO-20,					\
 	.static_prio	= MAX_PRIO-20,					\
 	.normal_prio	= MAX_PRIO-20,					\
diff --git a/include/linux/input.h b/include/linux/input.h
index f3a7794a18c4..771d6d85667d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -167,6 +167,7 @@ struct input_keymap_entry {
 #define SYN_REPORT		0
 #define SYN_CONFIG		1
 #define SYN_MT_REPORT		2
+#define SYN_DROPPED		3
 
 /*
  * Keys and buttons
@@ -553,8 +554,8 @@ struct input_keymap_entry {
 #define KEY_DVD			0x185	/* Media Select DVD */
 #define KEY_AUX			0x186
 #define KEY_MP3			0x187
-#define KEY_AUDIO		0x188
-#define KEY_VIDEO		0x189
+#define KEY_AUDIO		0x188	/* AL Audio Browser */
+#define KEY_VIDEO		0x189	/* AL Movie Browser */
 #define KEY_DIRECTORY		0x18a
 #define KEY_LIST		0x18b
 #define KEY_MEMO		0x18c	/* Media Select Messages */
@@ -603,8 +604,9 @@ struct input_keymap_entry {
 #define KEY_FRAMEFORWARD	0x1b5
 #define KEY_CONTEXT_MENU	0x1b6	/* GenDesc - system context menu */
 #define KEY_MEDIA_REPEAT	0x1b7	/* Consumer - transport control */
-#define KEY_10CHANNELSUP        0x1b8   /* 10 channels up (10+) */
-#define KEY_10CHANNELSDOWN      0x1b9   /* 10 channels down (10-) */
+#define KEY_10CHANNELSUP	0x1b8	/* 10 channels up (10+) */
+#define KEY_10CHANNELSDOWN	0x1b9	/* 10 channels down (10-) */
+#define KEY_IMAGES		0x1ba	/* AL Image Browser */
 
 #define KEY_DEL_EOL		0x1c0
 #define KEY_DEL_EOS		0x1c1
diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h
index b3ac06a4435d..318bb82325a6 100644
--- a/include/linux/input/mt.h
+++ b/include/linux/input/mt.h
@@ -48,6 +48,12 @@ static inline void input_mt_slot(struct input_dev *dev, int slot)
 	input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
 }
 
+static inline bool input_is_mt_axis(int axis)
+{
+	return axis == ABS_MT_SLOT ||
+		(axis >= ABS_MT_FIRST && axis <= ABS_MT_LAST);
+}
+
 void input_mt_report_slot_state(struct input_dev *dev,
 				unsigned int tool_type, bool active);
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index bea0ac750712..6c12989839d9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -414,7 +414,6 @@ enum
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
 	HRTIMER_SOFTIRQ,
-	RCU_SOFTIRQ,	/* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
 };
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 09a308072f56..8b4538446636 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -53,12 +53,13 @@ typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
  * Bits which can be modified via irq_set/clear/modify_status_flags()
  * IRQ_LEVEL			- Interrupt is level type. Will be also
  *				  updated in the code when the above trigger
- *				  bits are modified via set_irq_type()
+ *				  bits are modified via irq_set_irq_type()
  * IRQ_PER_CPU			- Mark an interrupt PER_CPU. Will protect
  *				  it from affinity setting
  * IRQ_NOPROBE			- Interrupt cannot be probed by autoprobing
  * IRQ_NOREQUEST		- Interrupt cannot be requested via
  *				  request_irq()
+ * IRQ_NOTHREAD			- Interrupt cannot be threaded
  * IRQ_NOAUTOEN			- Interrupt is not automatically enabled in
  *				  request/setup_irq()
  * IRQ_NO_BALANCING		- Interrupt cannot be balanced (affinity set)
@@ -85,6 +86,7 @@ enum {
 	IRQ_NO_BALANCING	= (1 << 13),
 	IRQ_MOVE_PCNTXT		= (1 << 14),
 	IRQ_NESTED_THREAD	= (1 << 15),
+	IRQ_NOTHREAD		= (1 << 16),
 };
 
 #define IRQF_MODIFY_MASK	\
@@ -261,23 +263,6 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
  * struct irq_chip - hardware interrupt chip descriptor
  *
  * @name:		name for /proc/interrupts
- * @startup:		deprecated, replaced by irq_startup
- * @shutdown:		deprecated, replaced by irq_shutdown
- * @enable:		deprecated, replaced by irq_enable
- * @disable:		deprecated, replaced by irq_disable
- * @ack:		deprecated, replaced by irq_ack
- * @mask:		deprecated, replaced by irq_mask
- * @mask_ack:		deprecated, replaced by irq_mask_ack
- * @unmask:		deprecated, replaced by irq_unmask
- * @eoi:		deprecated, replaced by irq_eoi
- * @end:		deprecated, will go away with __do_IRQ()
- * @set_affinity:	deprecated, replaced by irq_set_affinity
- * @retrigger:		deprecated, replaced by irq_retrigger
- * @set_type:		deprecated, replaced by irq_set_type
- * @set_wake:		deprecated, replaced by irq_wake
- * @bus_lock:		deprecated, replaced by irq_bus_lock
- * @bus_sync_unlock:	deprecated, replaced by irq_bus_sync_unlock
- *
  * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
  * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
  * @irq_enable:		enable the interrupt (defaults to chip->unmask if NULL)
@@ -295,6 +280,9 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d)
  * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
  * @irq_cpu_online:	configure an interrupt source for a secondary CPU
  * @irq_cpu_offline:	un-configure an interrupt source for a secondary CPU
+ * @irq_suspend:	function called from core code on suspend once per chip
+ * @irq_resume:		function called from core code on resume once per chip
+ * @irq_pm_shutdown:	function called from core code on shutdown once per chip
  * @irq_print_chip:	optional to print special chip info in show_interrupts
  * @flags:		chip specific flags
  *
@@ -324,6 +312,10 @@ struct irq_chip {
 	void		(*irq_cpu_online)(struct irq_data *data);
 	void		(*irq_cpu_offline)(struct irq_data *data);
 
+	void		(*irq_suspend)(struct irq_data *data);
+	void		(*irq_resume)(struct irq_data *data);
+	void		(*irq_pm_shutdown)(struct irq_data *data);
+
 	void		(*irq_print_chip)(struct irq_data *data, struct seq_file *p);
 
 	unsigned long	flags;
@@ -439,7 +431,7 @@ irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
 /*
  * Set a highlevel chained flow handler for a given IRQ.
  * (a chained handler is automatically enabled and set to
- *  IRQ_NOREQUEST and IRQ_NOPROBE)
+ *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
  */
 static inline void
 irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
@@ -469,6 +461,16 @@ static inline void irq_set_probe(unsigned int irq)
 	irq_modify_status(irq, IRQ_NOPROBE, 0);
 }
 
+static inline void irq_set_nothread(unsigned int irq)
+{
+	irq_modify_status(irq, 0, IRQ_NOTHREAD);
+}
+
+static inline void irq_set_thread(unsigned int irq)
+{
+	irq_modify_status(irq, IRQ_NOTHREAD, 0);
+}
+
 static inline void irq_set_nested_thread(unsigned int irq, bool nest)
 {
 	if (nest)
@@ -573,6 +575,145 @@ static inline int irq_reserve_irq(unsigned int irq)
 	return irq_reserve_irqs(irq, 1);
 }
 
+#ifndef irq_reg_writel
+# define irq_reg_writel(val, addr)	writel(val, addr)
+#endif
+#ifndef irq_reg_readl
+# define irq_reg_readl(addr)		readl(addr)
+#endif
+
+/**
+ * struct irq_chip_regs - register offsets for struct irq_gci
+ * @enable:	Enable register offset to reg_base
+ * @disable:	Disable register offset to reg_base
+ * @mask:	Mask register offset to reg_base
+ * @ack:	Ack register offset to reg_base
+ * @eoi:	Eoi register offset to reg_base
+ * @type:	Type configuration register offset to reg_base
+ * @polarity:	Polarity configuration register offset to reg_base
+ */
+struct irq_chip_regs {
+	unsigned long		enable;
+	unsigned long		disable;
+	unsigned long		mask;
+	unsigned long		ack;
+	unsigned long		eoi;
+	unsigned long		type;
+	unsigned long		polarity;
+};
+
+/**
+ * struct irq_chip_type - Generic interrupt chip instance for a flow type
+ * @chip:		The real interrupt chip which provides the callbacks
+ * @regs:		Register offsets for this chip
+ * @handler:		Flow handler associated with this chip
+ * @type:		Chip can handle these flow types
+ *
+ * A irq_generic_chip can have several instances of irq_chip_type when
+ * it requires different functions and register offsets for different
+ * flow types.
+ */
+struct irq_chip_type {
+	struct irq_chip		chip;
+	struct irq_chip_regs	regs;
+	irq_flow_handler_t	handler;
+	u32			type;
+};
+
+/**
+ * struct irq_chip_generic - Generic irq chip data structure
+ * @lock:		Lock to protect register and cache data access
+ * @reg_base:		Register base address (virtual)
+ * @irq_base:		Interrupt base nr for this chip
+ * @irq_cnt:		Number of interrupts handled by this chip
+ * @mask_cache:		Cached mask register
+ * @type_cache:		Cached type register
+ * @polarity_cache:	Cached polarity register
+ * @wake_enabled:	Interrupt can wakeup from suspend
+ * @wake_active:	Interrupt is marked as an wakeup from suspend source
+ * @num_ct:		Number of available irq_chip_type instances (usually 1)
+ * @private:		Private data for non generic chip callbacks
+ * @list:		List head for keeping track of instances
+ * @chip_types:		Array of interrupt irq_chip_types
+ *
+ * Note, that irq_chip_generic can have multiple irq_chip_type
+ * implementations which can be associated to a particular irq line of
+ * an irq_chip_generic instance. That allows to share and protect
+ * state in an irq_chip_generic instance when we need to implement
+ * different flow mechanisms (level/edge) for it.
+ */
+struct irq_chip_generic {
+	raw_spinlock_t		lock;
+	void __iomem		*reg_base;
+	unsigned int		irq_base;
+	unsigned int		irq_cnt;
+	u32			mask_cache;
+	u32			type_cache;
+	u32			polarity_cache;
+	u32			wake_enabled;
+	u32			wake_active;
+	unsigned int		num_ct;
+	void			*private;
+	struct list_head	list;
+	struct irq_chip_type	chip_types[0];
+};
+
+/**
+ * enum irq_gc_flags - Initialization flags for generic irq chips
+ * @IRQ_GC_INIT_MASK_CACHE:	Initialize the mask_cache by reading mask reg
+ * @IRQ_GC_INIT_NESTED_LOCK:	Set the lock class of the irqs to nested for
+ *				irq chips which need to call irq_set_wake() on
+ *				the parent irq. Usually GPIO implementations
+ */
+enum irq_gc_flags {
+	IRQ_GC_INIT_MASK_CACHE		= 1 << 0,
+	IRQ_GC_INIT_NESTED_LOCK		= 1 << 1,
+};
+
+/* Generic chip callback functions */
+void irq_gc_noop(struct irq_data *d);
+void irq_gc_mask_disable_reg(struct irq_data *d);
+void irq_gc_mask_set_bit(struct irq_data *d);
+void irq_gc_mask_clr_bit(struct irq_data *d);
+void irq_gc_unmask_enable_reg(struct irq_data *d);
+void irq_gc_ack(struct irq_data *d);
+void irq_gc_mask_disable_reg_and_ack(struct irq_data *d);
+void irq_gc_eoi(struct irq_data *d);
+int irq_gc_set_wake(struct irq_data *d, unsigned int on);
+
+/* Setup functions for irq_chip_generic */
+struct irq_chip_generic *
+irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base,
+		       void __iomem *reg_base, irq_flow_handler_t handler);
+void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			    enum irq_gc_flags flags, unsigned int clr,
+			    unsigned int set);
+int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
+void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			     unsigned int clr, unsigned int set);
+
+static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
+{
+	return container_of(d->chip, struct irq_chip_type, chip);
+}
+
+#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
+
+#ifdef CONFIG_SMP
+static inline void irq_gc_lock(struct irq_chip_generic *gc)
+{
+	raw_spin_lock(&gc->lock);
+}
+
+static inline void irq_gc_unlock(struct irq_chip_generic *gc)
+{
+	raw_spin_unlock(&gc->lock);
+}
+#else
+static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
+static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
+#endif
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a082905b5ebe..2d921b35212c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -16,16 +16,18 @@ struct timer_rand_state;
  * @irq_data:		per irq and chip data passed down to chip functions
  * @timer_rand_state:	pointer to timer rand state struct
  * @kstat_irqs:		irq stats per cpu
- * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
+ * @handle_irq:		highlevel irq-events handler
+ * @preflow_handler:	handler called before the flow handler (currently used by sparc)
  * @action:		the irq action chain
  * @status:		status information
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
- * @wake_depth:		enable depth, for multiple set_irq_wake() callers
+ * @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
  * @irq_count:		stats field to detect stalled irqs
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
+ * @affinity_hint:	hint to user space for preferred irq affinity
  * @affinity_notify:	context for notification of affinity changes
  * @pending_mask:	pending rebalanced interrupts
  * @threads_oneshot:	bitfield to handle shared oneshot threads
@@ -109,10 +111,7 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de
 	desc->handle_irq(irq, desc);
 }
 
-static inline void generic_handle_irq(unsigned int irq)
-{
-	generic_handle_irq_desc(irq, irq_to_desc(irq));
-}
+int generic_handle_irq(unsigned int irq);
 
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7880f18e4b86..83e745f3ead7 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,20 +1,43 @@
 #ifndef _LINUX_JUMP_LABEL_H
 #define _LINUX_JUMP_LABEL_H
 
+#include <linux/types.h>
+#include <linux/compiler.h>
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+
+struct jump_label_key {
+	atomic_t enabled;
+	struct jump_entry *entries;
+#ifdef CONFIG_MODULES
+	struct jump_label_mod *next;
+#endif
+};
+
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif
 
 enum jump_label_type {
+	JUMP_LABEL_DISABLE = 0,
 	JUMP_LABEL_ENABLE,
-	JUMP_LABEL_DISABLE
 };
 
 struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
+#ifdef CONFIG_MODULES
+#define JUMP_LABEL_INIT {{ 0 }, NULL, NULL}
+#else
+#define JUMP_LABEL_INIT {{ 0 }, NULL}
+#endif
+
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	return arch_static_branch(key);
+}
+
 extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
@@ -23,37 +46,37 @@ extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
-extern void jump_label_update(unsigned long key, enum jump_label_type type);
-extern void jump_label_apply_nops(struct module *mod);
 extern int jump_label_text_reserved(void *start, void *end);
+extern void jump_label_inc(struct jump_label_key *key);
+extern void jump_label_dec(struct jump_label_key *key);
+extern bool jump_label_enabled(struct jump_label_key *key);
+extern void jump_label_apply_nops(struct module *mod);
 
-#define jump_label_enable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
+#else
 
-#define jump_label_disable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
+#include <asm/atomic.h>
 
-#else
+#define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
 
-#define JUMP_LABEL(key, label)			\
-do {						\
-	if (unlikely(*key))			\
-		goto label;			\
-} while (0)
+struct jump_label_key {
+	atomic_t enabled;
+};
 
-#define jump_label_enable(cond_var)	\
-do {					\
-       *(cond_var) = 1;			\
-} while (0)
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	if (unlikely(atomic_read(&key->enabled)))
+		return true;
+	return false;
+}
 
-#define jump_label_disable(cond_var)	\
-do {					\
-       *(cond_var) = 0;			\
-} while (0)
+static inline void jump_label_inc(struct jump_label_key *key)
+{
+	atomic_inc(&key->enabled);
+}
 
-static inline int jump_label_apply_nops(struct module *mod)
+static inline void jump_label_dec(struct jump_label_key *key)
 {
-	return 0;
+	atomic_dec(&key->enabled);
 }
 
 static inline int jump_label_text_reserved(void *start, void *end)
@@ -64,16 +87,16 @@ static inline int jump_label_text_reserved(void *start, void *end)
 static inline void jump_label_lock(void) {}
 static inline void jump_label_unlock(void) {}
 
-#endif
+static inline bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
 
-#define COND_STMT(key, stmt)					\
-do {								\
-	__label__ jl_enabled;					\
-	JUMP_LABEL(key, jl_enabled);				\
-	if (0) {						\
-jl_enabled:							\
-		stmt;						\
-	}							\
-} while (0)
+static inline int jump_label_apply_nops(struct module *mod)
+{
+	return 0;
+}
+
+#endif
 
 #endif
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
deleted file mode 100644
index e5d012ad92c6..000000000000
--- a/include/linux/jump_label_ref.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _LINUX_JUMP_LABEL_REF_H
-#define _LINUX_JUMP_LABEL_REF_H
-
-#include <linux/jump_label.h>
-#include <asm/atomic.h>
-
-#ifdef HAVE_JUMP_LABEL
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	if (atomic_add_return(1, key) == 1)
-		jump_label_enable(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	if (atomic_dec_and_test(key))
-		jump_label_disable(key);
-}
-
-#else /* !HAVE_JUMP_LABEL */
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	atomic_inc(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	atomic_dec(key);
-}
-
-#undef JUMP_LABEL
-#define JUMP_LABEL(key, label)						\
-do {									\
-	if (unlikely(__builtin_choose_expr(				\
-	      __builtin_types_compatible_p(typeof(key), atomic_t *),	\
-	      atomic_read((atomic_t *)(key)), *(key))))			\
-		goto label;						\
-} while (0)
-
-#endif /* HAVE_JUMP_LABEL */
-
-#endif /* _LINUX_JUMP_LABEL_REF_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 00cec4dc0ae2..f37ba716ef8b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -283,6 +283,7 @@ extern char *get_options(const char *str, int nints, int *ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
 
 extern int core_kernel_text(unsigned long addr);
+extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
 extern int func_ptr_is_kernel_text(void *ptr);
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 6efd7a78de6a..310231823852 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -113,5 +113,6 @@ extern void usermodehelper_init(void);
 
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
+extern bool usermodehelper_is_disabled(void);
 
 #endif /* __LINUX_KMOD_H__ */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7f675aa81d87..04f32a3eb26b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -137,8 +137,6 @@ enum {
 	ATA_DFLAG_ACPI_PENDING	= (1 << 5), /* ACPI resume action pending */
 	ATA_DFLAG_ACPI_FAILED	= (1 << 6), /* ACPI on devcfg has failed */
 	ATA_DFLAG_AN		= (1 << 7), /* AN configured */
-	ATA_DFLAG_HIPM		= (1 << 8), /* device supports HIPM */
-	ATA_DFLAG_DIPM		= (1 << 9), /* device supports DIPM */
 	ATA_DFLAG_DMADIR	= (1 << 10), /* device requires DMADIR */
 	ATA_DFLAG_CFG_MASK	= (1 << 12) - 1,
 
@@ -198,6 +196,7 @@ enum {
 					      * management */
 	ATA_FLAG_SW_ACTIVITY	= (1 << 22), /* driver supports sw activity
 					      * led */
+	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
diff --git a/include/linux/list.h b/include/linux/list.h
index 3a54266a1e85..cc6d2aa6b415 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -4,7 +4,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/poison.h>
-#include <linux/prefetch.h>
+#include <linux/const.h>
 
 /*
  * Simple doubly linked list implementation.
@@ -367,18 +367,15 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = pos->next)
+	for (pos = (head)->next; pos != (head); pos = pos->next)
 
 /**
  * __list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop cursor.
  * @head:	the head for your list.
  *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
+ * This variant doesn't differ from list_for_each() any more.
+ * We don't do prefetching in either case.
  */
 #define __list_for_each(pos, head) \
 	for (pos = (head)->next; pos != (head); pos = pos->next)
@@ -389,8 +386,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:	the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
-        	pos = pos->prev)
+	for (pos = (head)->prev; pos != (head); pos = pos->prev)
 
 /**
  * list_for_each_safe - iterate over a list safe against removal of list entry
@@ -410,7 +406,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_prev_safe(pos, n, head) \
 	for (pos = (head)->prev, n = pos->prev; \
-	     prefetch(pos->prev), pos != (head); \
+	     pos != (head); \
 	     pos = n, n = pos->prev)
 
 /**
@@ -421,7 +417,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
+	     &pos->member != (head); 	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
@@ -432,7 +428,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
 	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head); 	\
+	     &pos->member != (head); 	\
 	     pos = list_entry(pos->member.prev, typeof(*pos), member))
 
 /**
@@ -457,7 +453,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_continue(pos, head, member) 		\
 	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
@@ -471,7 +467,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_continue_reverse(pos, head, member)		\
 	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry(pos->member.prev, typeof(*pos), member))
 
 /**
@@ -483,7 +479,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * Iterate over list of given type, continuing from current position.
  */
 #define list_for_each_entry_from(pos, head, member) 			\
-	for (; prefetch(pos->member.next), &pos->member != (head);	\
+	for (; &pos->member != (head);	\
 	     pos = list_entry(pos->member.next, typeof(*pos), member))
 
 /**
@@ -664,8 +660,7 @@ static inline void hlist_move_list(struct hlist_head *old,
 #define hlist_entry(ptr, type, member) container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
-	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
-	     pos = pos->next)
+	for (pos = (head)->first; pos ; pos = pos->next)
 
 #define hlist_for_each_safe(pos, n, head) \
 	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
@@ -680,7 +675,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  */
 #define hlist_for_each_entry(tpos, pos, head, member)			 \
 	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
@@ -692,7 +687,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  */
 #define hlist_for_each_entry_continue(tpos, pos, member)		 \
 	for (pos = (pos)->next;						 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
@@ -703,7 +698,7 @@ static inline void hlist_move_list(struct hlist_head *old,
  * @member:	the name of the hlist_node within the struct.
  */
 #define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
+	for (; pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
 	     pos = pos->next)
 
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 5bad17d1acde..31f9d75adc5b 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -2,6 +2,7 @@
 #define _LINUX_LIST_BL_H
 
 #include <linux/list.h>
+#include <linux/bit_spinlock.h>
 
 /*
  * Special version of lists, where head of the list has a lock in the lowest
@@ -114,6 +115,16 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n)
 	}
 }
 
+static inline void hlist_bl_lock(struct hlist_bl_head *b)
+{
+	bit_spin_lock(0, (unsigned long *)b);
+}
+
+static inline void hlist_bl_unlock(struct hlist_bl_head *b)
+{
+	__bit_spin_unlock(0, (unsigned long *)b);
+}
+
 /**
  * hlist_bl_for_each_entry	- iterate over list of given type
  * @tpos:	the type * to use as a loop cursor.
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5a5ce7055839..5e9840f50980 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -216,7 +216,7 @@ static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
 	return ;
 }
 
-static inline inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
+static inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
 {
 	return ;
 }
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index ad1b19aa6508..aef23309a742 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -86,16 +86,25 @@ extern int mfd_clone_cell(const char *cell, const char **clones,
  */
 static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 {
-	return pdev->dev.platform_data;
+	return pdev->mfd_cell;
 }
 
 /*
  * Given a platform device that's been created by mfd_add_devices(), fetch
  * the .mfd_data entry from the mfd_cell that created it.
+ * Otherwise just return the platform_data pointer.
+ * This maintains compatibility with platform drivers whose devices aren't
+ * created by the mfd layer, and expect platform_data to contain what would've
+ * otherwise been in mfd_data.
  */
 static inline void *mfd_get_data(struct platform_device *pdev)
 {
-	return mfd_get_cell(pdev)->mfd_data;
+	const struct mfd_cell *cell = mfd_get_cell(pdev);
+
+	if (cell)
+		return cell->mfd_data;
+	else
+		return pdev->dev.platform_data;
 }
 
 extern int mfd_add_devices(struct device *parent, int id,
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index afe4db49402d..632d1567a1b6 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -81,7 +81,9 @@ struct wm831x_touch_pdata {
 	int rpu;               /** Pen down sensitivity resistor divider */
 	int pressure;          /** Report pressure (boolean) */
 	unsigned int data_irq; /** Touch data ready IRQ */
+	int data_irqf;         /** IRQ flags for data ready IRQ */
 	unsigned int pd_irq;   /** Touch pendown detect IRQ */
+	int pd_irqf;           /** IRQ flags for pen down IRQ */
 };
 
 enum wm831x_watchdog_action {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 692dbae6ffa7..6507dde38b16 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
 /*
- * special vmas that are non-mergable, non-mlock()able
+ * Special vmas that are non-mergable, non-mlock()able.
+ * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
 
@@ -1010,11 +1011,33 @@ int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
 /* Is the vma a continuation of the stack vma above it? */
-static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
+static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
 {
 	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
 }
 
+static inline int stack_guard_page_start(struct vm_area_struct *vma,
+					     unsigned long addr)
+{
+	return (vma->vm_flags & VM_GROWSDOWN) &&
+		(vma->vm_start == addr) &&
+		!vma_growsdown(vma->vm_prev, addr);
+}
+
+/* Is the vma a continuation of the stack vma below it? */
+static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+{
+	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+}
+
+static inline int stack_guard_page_end(struct vm_area_struct *vma,
+					   unsigned long addr)
+{
+	return (vma->vm_flags & VM_GROWSUP) &&
+		(vma->vm_end == addr) &&
+		!vma_growsup(vma->vm_next, addr);
+}
+
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len);
diff --git a/include/linux/module.h b/include/linux/module.h
index 5de42043dff0..d9ca2d5dc6d0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -64,6 +64,9 @@ struct module_version_attribute {
 	const char *version;
 } __attribute__ ((__aligned__(sizeof(void *))));
 
+extern ssize_t __modver_version_show(struct module_attribute *,
+				     struct module *, char *);
+
 struct module_kobject
 {
 	struct kobject kobj;
@@ -172,12 +175,7 @@ extern struct module __this_module;
 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
 #else
 #define MODULE_VERSION(_version)					\
-	extern ssize_t __modver_version_show(struct module_attribute *,	\
-					     struct module *, char *);	\
-	static struct module_version_attribute __modver_version_attr	\
-	__used								\
-    __attribute__ ((__section__ ("__modver"),aligned(sizeof(void *)))) \
-	= {								\
+	static struct module_version_attribute ___modver_attr = {	\
 		.mattr	= {						\
 			.attr	= {					\
 				.name	= "version",			\
@@ -187,7 +185,10 @@ extern struct module __this_module;
 		},							\
 		.module_name	= KBUILD_MODNAME,			\
 		.version	= _version,				\
-	}
+	};								\
+	static const struct module_version_attribute			\
+	__used __attribute__ ((__section__ ("__modver")))		\
+	* __moduleparam_const __modver_attr = &___modver_attr
 #endif
 
 /* Optional firmware file (or files) needed by the module
@@ -223,7 +224,7 @@ struct module_use {
 	extern void *__crc_##sym __attribute__((weak));		\
 	static const unsigned long __kcrctab_##sym		\
 	__used							\
-	__attribute__((section("__kcrctab" sec), unused))	\
+	__attribute__((section("___kcrctab" sec "+" #sym), unused))	\
 	= (unsigned long) &__crc_##sym;
 #else
 #define __CRC_SYMBOL(sym, sec)
@@ -238,7 +239,7 @@ struct module_use {
 	= MODULE_SYMBOL_PREFIX #sym;                    	\
 	static const struct kernel_symbol __ksymtab_##sym	\
 	__used							\
-	__attribute__((section("__ksymtab" sec), unused))	\
+	__attribute__((section("___ksymtab" sec "+" #sym), unused))	\
 	= { (unsigned long)&sym, __kstrtab_##sym }
 
 #define EXPORT_SYMBOL(sym)					\
@@ -367,34 +368,35 @@ struct module
 	struct module_notes_attrs *notes_attrs;
 #endif
 
+	/* The command line arguments (may be mangled).  People like
+	   keeping pointers to this stuff */
+	char *args;
+
 #ifdef CONFIG_SMP
 	/* Per-cpu data. */
 	void __percpu *percpu;
 	unsigned int percpu_size;
 #endif
 
-	/* The command line arguments (may be mangled).  People like
-	   keeping pointers to this stuff */
-	char *args;
 #ifdef CONFIG_TRACEPOINTS
-	struct tracepoint * const *tracepoints_ptrs;
 	unsigned int num_tracepoints;
+	struct tracepoint * const *tracepoints_ptrs;
 #endif
 #ifdef HAVE_JUMP_LABEL
 	struct jump_entry *jump_entries;
 	unsigned int num_jump_entries;
 #endif
 #ifdef CONFIG_TRACING
-	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
+	const char **trace_bprintk_fmt_start;
 #endif
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call **trace_events;
 	unsigned int num_trace_events;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-	unsigned long *ftrace_callsites;
 	unsigned int num_ftrace_callsites;
+	unsigned long *ftrace_callsites;
 #endif
 
 #ifdef CONFIG_MODULE_UNLOAD
@@ -475,8 +477,9 @@ const struct kernel_symbol *find_symbol(const char *name,
 					bool warn);
 
 /* Walk the exported symbol table */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
-			    unsigned int symnum, void *data), void *data);
+bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
+				    struct module *owner,
+				    void *data), void *data);
 
 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
    symnum out of range. */
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 07b41951e3fa..ddaae98c53f9 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -67,9 +67,9 @@ struct kparam_string {
 struct kparam_array
 {
 	unsigned int max;
+	unsigned int elemsize;
 	unsigned int *num;
 	const struct kernel_param_ops *ops;
-	unsigned int elemsize;
 	void *elem;
 };
 
@@ -371,8 +371,9 @@ extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
  */
 #define module_param_array_named(name, array, type, nump, perm)		\
 	static const struct kparam_array __param_arr_##name		\
-	= { ARRAY_SIZE(array), nump, &param_ops_##type,			\
-	    sizeof(array[0]), array };					\
+	= { .max = ARRAY_SIZE(array), .num = nump,                      \
+	    .ops = &param_ops_##type,					\
+	    .elemsize = sizeof(array[0]), .elem = array };		\
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_array_ops,				\
 			    .arr = &__param_arr_##name,			\
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 94b48bd40dd7..c75471db576e 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -51,7 +51,7 @@ struct mutex {
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
 #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
-	struct thread_info	*owner;
+	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 216cea5db0aa..87694ca86914 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -47,6 +47,7 @@ struct nfs_client {
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;	/* Clientid verifier */
 	unsigned long		cl_state;
 
 	spinlock_t		cl_lock;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 78b101e487ea..7e371f7df9c4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -50,6 +50,7 @@ struct nfs_fattr {
 	} du;
 	struct nfs_fsid		fsid;
 	__u64			fileid;
+	__u64			mounted_on_fileid;
 	struct timespec		atime;
 	struct timespec		mtime;
 	struct timespec		ctime;
@@ -83,6 +84,7 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_PRECHANGE	(1U << 18)
 #define NFS_ATTR_FATTR_V4_REFERRAL	(1U << 19)	/* NFSv4 referral */
 #define NFS_ATTR_FATTR_MOUNTPOINT	(1U << 20)	/* Treat as mountpoint */
+#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID		(1U << 21)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
@@ -231,6 +233,7 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct pnfs_layout_segment **lsegpp;
+	gfp_t gfp_flags;
 };
 
 struct nfs4_getdeviceinfo_args {
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 8bfe6c1d4365..ae5638480ef2 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -21,8 +21,7 @@ extern void of_device_make_bus_id(struct device *dev);
 static inline int of_driver_match_device(struct device *dev,
 					 const struct device_driver *drv)
 {
-	dev->of_match = of_match_device(drv->of_match_table, dev);
-	return dev->of_match != NULL;
+	return of_match_device(drv->of_match_table, dev) != NULL;
 }
 
 extern struct platform_device *of_dev_get(struct platform_device *dev);
@@ -58,6 +57,11 @@ static inline int of_device_uevent(struct device *dev,
 
 static inline void of_device_node_put(struct device *dev) { }
 
+static inline const struct of_device_id *of_match_device(
+		const struct of_device_id *matches, const struct device *dev)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 109e013b1772..e6955f5d1f08 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -68,6 +68,7 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 #endif /* CONFIG_OF_IRQ */
 #endif /* CONFIG_OF */
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
new file mode 100644
index 000000000000..655824fa4c76
--- /dev/null
+++ b/include/linux/pci-ats.h
@@ -0,0 +1,52 @@
+#ifndef LINUX_PCI_ATS_H
+#define LINUX_PCI_ATS_H
+
+/* Address Translation Service */
+struct pci_ats {
+	int pos;        /* capability position */
+	int stu;        /* Smallest Translation Unit */
+	int qdep;       /* Invalidate Queue Depth */
+	int ref_cnt;    /* Physical Function reference count */
+	unsigned int is_enabled:1;      /* Enable bit is set */
+};
+
+#ifdef CONFIG_PCI_IOV
+
+extern int pci_enable_ats(struct pci_dev *dev, int ps);
+extern void pci_disable_ats(struct pci_dev *dev);
+extern int pci_ats_queue_depth(struct pci_dev *dev);
+/**
+ * pci_ats_enabled - query the ATS status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ATS capability is enabled, or 0 if not.
+ */
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return dev->ats && dev->ats->is_enabled;
+}
+
+#else /* CONFIG_PCI_IOV */
+
+static inline int pci_enable_ats(struct pci_dev *dev, int ps)
+{
+	return -ENODEV;
+}
+
+static inline void pci_disable_ats(struct pci_dev *dev)
+{
+}
+
+static inline int pci_ats_queue_depth(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+
+#endif /* LINUX_PCI_ATS_H*/
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4e2c9150a785..8abe8d78c4bf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2477,15 +2477,12 @@
 #define PCI_DEVICE_ID_INTEL_82840_HB	0x1a21
 #define PCI_DEVICE_ID_INTEL_82845_HB	0x1a30
 #define PCI_DEVICE_ID_INTEL_IOAT	0x1a38
-#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
-#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN	0x2310
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX	0x231f
-#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS	0x2330
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
 #define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
@@ -2696,7 +2693,6 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN	0x3b00
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX	0x3b1f
-#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3a5c4449fd36..8b97308e65df 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -948,7 +948,7 @@ do {									\
 	irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
 # endif
 # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+	__pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 #endif
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ee9f1e782800..3412684ce5d5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -2,8 +2,8 @@
  * Performance events:
  *
  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
  *
  * Data type definitions, declarations, prototypes.
  *
@@ -52,6 +52,8 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
@@ -468,9 +470,9 @@ enum perf_callchain_context {
 	PERF_CONTEXT_MAX		= (__u64)-4095,
 };
 
-#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
-#define PERF_FLAG_FD_OUTPUT	(1U << 1)
-#define PERF_FLAG_PID_CGROUP	(1U << 2) /* pid=cgroup id, per-cpu mode only */
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+#define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
 
 #ifdef __KERNEL__
 /*
@@ -484,9 +486,9 @@ enum perf_callchain_context {
 #endif
 
 struct perf_guest_info_callbacks {
-	int (*is_in_guest) (void);
-	int (*is_user_mode) (void);
-	unsigned long (*get_guest_ip) (void);
+	int				(*is_in_guest)(void);
+	int				(*is_user_mode)(void);
+	unsigned long			(*get_guest_ip)(void);
 };
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -505,7 +507,7 @@ struct perf_guest_info_callbacks {
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
-#include <linux/jump_label_ref.h>
+#include <linux/jump_label.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -652,19 +654,19 @@ struct pmu {
 	 * Start the transaction, after this ->add() doesn't need to
 	 * do schedulability tests.
 	 */
-	void (*start_txn)	(struct pmu *pmu); /* optional */
+	void (*start_txn)		(struct pmu *pmu); /* optional */
 	/*
 	 * If ->start_txn() disabled the ->add() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
 	 */
-	int  (*commit_txn)	(struct pmu *pmu); /* optional */
+	int  (*commit_txn)		(struct pmu *pmu); /* optional */
 	/*
 	 * Will cancel the transaction, assumes ->del() is called
 	 * for each successful ->add() during the transaction.
 	 */
-	void (*cancel_txn)	(struct pmu *pmu); /* optional */
+	void (*cancel_txn)		(struct pmu *pmu); /* optional */
 };
 
 /**
@@ -712,15 +714,15 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 					struct pt_regs *regs);
 
 enum perf_group_flag {
-	PERF_GROUP_SOFTWARE = 0x1,
+	PERF_GROUP_SOFTWARE		= 0x1,
 };
 
-#define SWEVENT_HLIST_BITS	8
-#define SWEVENT_HLIST_SIZE	(1 << SWEVENT_HLIST_BITS)
+#define SWEVENT_HLIST_BITS		8
+#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
 
 struct swevent_hlist {
-	struct hlist_head	heads[SWEVENT_HLIST_SIZE];
-	struct rcu_head		rcu_head;
+	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
+	struct rcu_head			rcu_head;
 };
 
 #define PERF_ATTACH_CONTEXT	0x01
@@ -733,13 +735,13 @@ struct swevent_hlist {
  * This is a per-cpu dynamically allocated data structure.
  */
 struct perf_cgroup_info {
-	u64 time;
-	u64 timestamp;
+	u64				time;
+	u64				timestamp;
 };
 
 struct perf_cgroup {
-	struct cgroup_subsys_state css;
-	struct perf_cgroup_info *info;	/* timing info, one per cpu */
+	struct				cgroup_subsys_state css;
+	struct				perf_cgroup_info *info;	/* timing info, one per cpu */
 };
 #endif
 
@@ -923,7 +925,7 @@ struct perf_event_context {
 
 /*
  * Number of contexts where an event can trigger:
- * 	task, softirq, hardirq, nmi.
+ *	task, softirq, hardirq, nmi.
  */
 #define PERF_NR_CONTEXTS	4
 
@@ -1001,8 +1003,7 @@ struct perf_sample_data {
 	struct perf_raw_record		*raw;
 };
 
-static inline
-void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
 {
 	data->addr = addr;
 	data->raw  = NULL;
@@ -1034,13 +1035,12 @@ static inline int is_software_event(struct perf_event *event)
 	return event->pmu->task_ctx_nr == perf_sw_context;
 }
 
-extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
 #ifndef perf_arch_fetch_caller_regs
-static inline void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
+static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
 #endif
 
 /*
@@ -1063,26 +1063,24 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
 	struct pt_regs hot_regs;
 
-	JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
-	return;
-
-have_event:
-	if (!regs) {
-		perf_fetch_caller_regs(&hot_regs);
-		regs = &hot_regs;
+	if (static_branch(&perf_swevent_enabled[event_id])) {
+		if (!regs) {
+			perf_fetch_caller_regs(&hot_regs);
+			regs = &hot_regs;
+		}
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
 	}
-	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
-extern atomic_t perf_sched_events;
+extern struct jump_label_key perf_sched_events;
 
 static inline void perf_event_task_sched_in(struct task_struct *task)
 {
-	COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_in(task);
 }
 
-static inline
-void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
@@ -1100,14 +1098,10 @@ extern void perf_event_fork(struct task_struct *tsk);
 /* Callchains */
 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
 
-extern void perf_callchain_user(struct perf_callchain_entry *entry,
-				struct pt_regs *regs);
-extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
-				  struct pt_regs *regs);
-
+extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
+extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
 
-static inline void
-perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
+static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
 	if (entry->nr < PERF_MAX_STACK_DEPTH)
 		entry->ip[entry->nr++] = ip;
@@ -1143,9 +1137,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record,
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
-#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
-				 PERF_RECORD_MISC_KERNEL)
-#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+# define perf_misc_flags(regs) \
+		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
+# define perf_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
 
 extern int perf_output_begin(struct perf_output_handle *handle,
@@ -1180,9 +1174,9 @@ static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
 static inline int perf_register_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks) { return 0; }
+(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 static inline int perf_unregister_guest_info_callbacks
-(struct perf_guest_info_callbacks *callbacks) { return 0; }
+(struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
@@ -1195,23 +1189,22 @@ static inline void perf_event_disable(struct perf_event *event)		{ }
 static inline void perf_event_task_tick(void)				{ }
 #endif
 
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
+#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 
 /*
  * This has to have a higher priority than migration_notifier in sched.c.
  */
-#define perf_cpu_notifier(fn)					\
-do {								\
-	static struct notifier_block fn##_nb __cpuinitdata =	\
-		{ .notifier_call = fn, .priority = CPU_PRI_PERF }; \
-	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,		\
-		(void *)(unsigned long)smp_processor_id());	\
-	fn(&fn##_nb, (unsigned long)CPU_STARTING,		\
-		(void *)(unsigned long)smp_processor_id());	\
-	fn(&fn##_nb, (unsigned long)CPU_ONLINE,			\
-		(void *)(unsigned long)smp_processor_id());	\
-	register_cpu_notifier(&fn##_nb);			\
+#define perf_cpu_notifier(fn)						\
+do {									\
+	static struct notifier_block fn##_nb __cpuinitdata =		\
+		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
+	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,			\
+		(void *)(unsigned long)smp_processor_id());		\
+	fn(&fn##_nb, (unsigned long)CPU_STARTING,			\
+		(void *)(unsigned long)smp_processor_id());		\
+	fn(&fn##_nb, (unsigned long)CPU_ONLINE,				\
+		(void *)(unsigned long)smp_processor_id());		\
+	register_cpu_notifier(&fn##_nb);				\
 } while (0)
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 31afb7ecbe1f..cdced84261d7 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -117,7 +117,7 @@ extern struct pid *find_vpid(int nr);
  */
 extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
-int next_pidmap(struct pid_namespace *pid_ns, int last);
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index d96db9825708..ede1a80e3358 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -14,6 +14,8 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 
+struct mfd_cell;
+
 struct platform_device {
 	const char	* name;
 	int		id;
@@ -23,6 +25,9 @@ struct platform_device {
 
 	const struct platform_device_id	*id_entry;
 
+	/* MFD cell pointer */
+	struct mfd_cell *mfd_cell;
+
 	/* arch specific additions */
 	struct pdev_archdata	archdata;
 };
@@ -145,9 +150,6 @@ extern struct platform_device *platform_create_bundle(struct platform_driver *dr
 					struct resource *res, unsigned int n_res,
 					const void *data, size_t size);
 
-extern const struct dev_pm_ops * platform_bus_get_pm_ops(void);
-extern void platform_bus_set_pm_ops(const struct dev_pm_ops *pm);
-
 /* early platform driver interface */
 struct early_platform_driver {
 	const char *class_str;
@@ -200,4 +202,64 @@ static inline char *early_platform_driver_setup_func(void)		\
 }
 #endif /* MODULE */
 
+#ifdef CONFIG_PM_SLEEP
+extern int platform_pm_prepare(struct device *dev);
+extern void platform_pm_complete(struct device *dev);
+#else
+#define platform_pm_prepare	NULL
+#define platform_pm_complete	NULL
+#endif
+
+#ifdef CONFIG_SUSPEND
+extern int platform_pm_suspend(struct device *dev);
+extern int platform_pm_suspend_noirq(struct device *dev);
+extern int platform_pm_resume(struct device *dev);
+extern int platform_pm_resume_noirq(struct device *dev);
+#else
+#define platform_pm_suspend		NULL
+#define platform_pm_resume		NULL
+#define platform_pm_suspend_noirq	NULL
+#define platform_pm_resume_noirq	NULL
+#endif
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+extern int platform_pm_freeze(struct device *dev);
+extern int platform_pm_freeze_noirq(struct device *dev);
+extern int platform_pm_thaw(struct device *dev);
+extern int platform_pm_thaw_noirq(struct device *dev);
+extern int platform_pm_poweroff(struct device *dev);
+extern int platform_pm_poweroff_noirq(struct device *dev);
+extern int platform_pm_restore(struct device *dev);
+extern int platform_pm_restore_noirq(struct device *dev);
+#else
+#define platform_pm_freeze		NULL
+#define platform_pm_thaw		NULL
+#define platform_pm_poweroff		NULL
+#define platform_pm_restore		NULL
+#define platform_pm_freeze_noirq	NULL
+#define platform_pm_thaw_noirq		NULL
+#define platform_pm_poweroff_noirq	NULL
+#define platform_pm_restore_noirq	NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define USE_PLATFORM_PM_SLEEP_OPS \
+	.prepare = platform_pm_prepare, \
+	.complete = platform_pm_complete, \
+	.suspend = platform_pm_suspend, \
+	.resume = platform_pm_resume, \
+	.freeze = platform_pm_freeze, \
+	.thaw = platform_pm_thaw, \
+	.poweroff = platform_pm_poweroff, \
+	.restore = platform_pm_restore, \
+	.suspend_noirq = platform_pm_suspend_noirq, \
+	.resume_noirq = platform_pm_resume_noirq, \
+	.freeze_noirq = platform_pm_freeze_noirq, \
+	.thaw_noirq = platform_pm_thaw_noirq, \
+	.poweroff_noirq = platform_pm_poweroff_noirq, \
+	.restore_noirq = platform_pm_restore_noirq,
+#else
+#define USE_PLATFORM_PM_SLEEP_OPS
+#endif
+
 #endif /* _PLATFORM_DEVICE_H_ */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 512e09177e57..3160648ccdda 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -460,6 +460,7 @@ struct dev_pm_info {
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
+	void			*subsys_data;  /* Owned by the subsystem. */
 #endif
 };
 
@@ -529,21 +530,17 @@ struct dev_power_domain {
  */
 
 #ifdef CONFIG_PM_SLEEP
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-extern int sysdev_suspend(pm_message_t state);
-extern int sysdev_resume(void);
-#else
-static inline int sysdev_suspend(pm_message_t state) { return 0; }
-static inline int sysdev_resume(void) { return 0; }
-#endif
-
 extern void device_pm_lock(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
+extern void dpm_resume(pm_message_t state);
+extern void dpm_complete(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int dpm_suspend_noirq(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
+extern int dpm_suspend(pm_message_t state);
+extern int dpm_prepare(pm_message_t state);
 
 extern void __suspend_report_result(const char *function, void *fn, int ret);
 
@@ -553,6 +550,16 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 	} while (0)
 
 extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
+
+extern int pm_generic_prepare(struct device *dev);
+extern int pm_generic_suspend(struct device *dev);
+extern int pm_generic_resume(struct device *dev);
+extern int pm_generic_freeze(struct device *dev);
+extern int pm_generic_thaw(struct device *dev);
+extern int pm_generic_restore(struct device *dev);
+extern int pm_generic_poweroff(struct device *dev);
+extern void pm_generic_complete(struct device *dev);
+
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -569,6 +576,15 @@ static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
 {
 	return 0;
 }
+
+#define pm_generic_prepare	NULL
+#define pm_generic_suspend	NULL
+#define pm_generic_resume	NULL
+#define pm_generic_freeze	NULL
+#define pm_generic_thaw		NULL
+#define pm_generic_restore	NULL
+#define pm_generic_poweroff	NULL
+#define pm_generic_complete	NULL
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
@@ -579,11 +595,4 @@ enum dpm_order {
 	DPM_ORDER_DEV_LAST,
 };
 
-extern int pm_generic_suspend(struct device *dev);
-extern int pm_generic_resume(struct device *dev);
-extern int pm_generic_freeze(struct device *dev);
-extern int pm_generic_thaw(struct device *dev);
-extern int pm_generic_restore(struct device *dev);
-extern int pm_generic_poweroff(struct device *dev);
-
 #endif /* _LINUX_PM_H */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 8de9aa6e7def..878cf84baeb1 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -245,4 +245,46 @@ static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
 	__pm_runtime_use_autosuspend(dev, false);
 }
 
+struct pm_clk_notifier_block {
+	struct notifier_block nb;
+	struct dev_power_domain *pwr_domain;
+	char *con_ids[];
+};
+
+#ifdef CONFIG_PM_RUNTIME_CLK
+extern int pm_runtime_clk_init(struct device *dev);
+extern void pm_runtime_clk_destroy(struct device *dev);
+extern int pm_runtime_clk_add(struct device *dev, const char *con_id);
+extern void pm_runtime_clk_remove(struct device *dev, const char *con_id);
+extern int pm_runtime_clk_suspend(struct device *dev);
+extern int pm_runtime_clk_resume(struct device *dev);
+#else
+static inline int pm_runtime_clk_init(struct device *dev)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_destroy(struct device *dev)
+{
+}
+static inline int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+}
+#define pm_runtime_clock_suspend	NULL
+#define pm_runtime_clock_resume		NULL
+#endif
+
+#ifdef CONFIG_HAVE_CLK
+extern void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb);
+#else
+static inline void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb)
+{
+}
+#endif
+
 #endif
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 369e19d3750b..7f1183dcd119 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -24,6 +24,7 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/posix-timers.h>
+#include <linux/rwsem.h>
 
 struct posix_clock;
 
@@ -104,7 +105,7 @@ struct posix_clock_operations {
  * @ops:     Functional interface to the clock
  * @cdev:    Character device instance for this clock
  * @kref:    Reference count.
- * @mutex:   Protects the 'zombie' field from concurrent access.
+ * @rwsem:   Protects the 'zombie' field from concurrent access.
  * @zombie:  If 'zombie' is true, then the hardware has disappeared.
  * @release: A function to free the structure when the reference count reaches
  *           zero. May be NULL if structure is statically allocated.
@@ -117,7 +118,7 @@ struct posix_clock {
 	struct posix_clock_operations ops;
 	struct cdev cdev;
 	struct kref kref;
-	struct mutex mutex;
+	struct rw_semaphore rwsem;
 	bool zombie;
 	void (*release)(struct posix_clock *clk);
 };
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d51243ae0726..808227d40a64 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/sched.h>
 #include <linux/timex.h>
+#include <linux/alarmtimer.h>
 
 union cpu_time_count {
 	cputime_t cpu;
@@ -80,6 +81,7 @@ struct k_itimer {
 			unsigned long incr;
 			unsigned long expires;
 		} mmtimer;
+		struct alarm alarmtimer;
 	} it;
 };
 
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 838c1149251a..eaf4350c0f90 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -208,6 +208,8 @@ static inline struct proc_dir_entry *proc_symlink(const char *name,
 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
 static inline struct proc_dir_entry *proc_mkdir(const char *name,
 	struct proc_dir_entry *parent) {return NULL;}
+static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
+	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 
 static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *base, 
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 41977737bb7d..2455ef2683f0 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,7 +35,9 @@ struct pstore_info {
 	struct mutex	buf_mutex;	/* serialize access to 'buf' */
 	char		*buf;
 	size_t		bufsize;
-	size_t		(*read)(u64 *id, enum pstore_type_id *type,
+	int		(*open)(struct pstore_info *psi);
+	int		(*close)(struct pstore_info *psi);
+	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 			struct timespec *time);
 	u64		(*write)(enum pstore_type_id type, size_t size);
 	int		(*erase)(u64 id);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a1147e5dd245..9178d5cc0b01 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -189,6 +189,10 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 		child->ptrace = current->ptrace;
 		__ptrace_link(child, current->parent);
 	}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	atomic_set(&child->ptrace_bp_refcnt, 1);
+#endif
 }
 
 /**
@@ -350,6 +354,13 @@ extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
 
-#endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern int ptrace_get_breakpoints(struct task_struct *tsk);
+extern void ptrace_put_breakpoints(struct task_struct *tsk);
+#else
+static inline void ptrace_put_breakpoints(struct task_struct *tsk) { }
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+#endif /* __KERNEL */
 
 #endif
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 7066acb2c530..033b507b33b1 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -136,6 +136,14 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 #define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
 #define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
 
+static inline void rb_init_node(struct rb_node *rb)
+{
+	rb->rb_parent_color = 0;
+	rb->rb_right = NULL;
+	rb->rb_left = NULL;
+	RB_CLEAR_NODE(rb);
+}
+
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
 
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 2dea94fc4402..e3beb315517a 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -253,7 +253,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_entry_rcu(pos, head, member) \
 	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
-		prefetch(pos->member.next), &pos->member != (head); \
+		&pos->member != (head); \
 		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
@@ -270,7 +270,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_continue_rcu(pos, head) \
 	for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
-		prefetch((pos)->next), (pos) != (head); \
+		(pos) != (head); \
 		(pos) = rcu_dereference_raw(list_next_rcu(pos)))
 
 /**
@@ -284,7 +284,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  */
 #define list_for_each_entry_continue_rcu(pos, head, member) 		\
 	for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
-	     prefetch(pos->member.next), &pos->member != (head);	\
+	     &pos->member != (head);	\
 	     pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 /**
@@ -427,7 +427,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 
 #define __hlist_for_each_rcu(pos, head)				\
 	for (pos = rcu_dereference(hlist_first_rcu(head));	\
-	     pos && ({ prefetch(pos->next); 1; });		\
+	     pos;						\
 	     pos = rcu_dereference(hlist_next_rcu(pos)))
 
 /**
@@ -443,7 +443,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu(tpos, pos, head, member)		\
 	for (pos = rcu_dereference_raw(hlist_first_rcu(head));		\
-		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(hlist_next_rcu(pos)))
 
@@ -460,7 +460,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu_bh(tpos, pos, head, member)		 \
 	for (pos = rcu_dereference_bh((head)->first);			 \
-		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		pos &&							 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_bh(pos->next))
 
@@ -472,7 +472,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_continue_rcu(tpos, pos, member)		\
 	for (pos = rcu_dereference((pos)->next);			\
-	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     pos &&							\
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference(pos->next))
 
@@ -484,7 +484,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member)		\
 	for (pos = rcu_dereference_bh((pos)->next);			\
-	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     pos &&							\
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference_bh(pos->next))
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ff422d2b7f90..99f9aa7c2804 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,18 @@
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+extern void rcutorture_record_test_transition(void);
+extern void rcutorture_record_progress(unsigned long vernum);
+#else
+static inline void rcutorture_record_test_transition(void)
+{
+}
+static inline void rcutorture_record_progress(unsigned long vernum)
+{
+}
+#endif
+
 #define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
 #define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
@@ -68,7 +80,6 @@ extern void call_rcu_sched(struct rcu_head *head,
 extern void synchronize_sched(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
-extern int sched_expedited_torture_stats(char *page);
 
 static inline void __rcu_read_lock_bh(void)
 {
@@ -774,6 +785,7 @@ extern struct debug_obj_descr rcuhead_debug_descr;
 
 static inline void debug_rcu_head_queue(struct rcu_head *head)
 {
+	WARN_ON_ONCE((unsigned long)head & 0x3);
 	debug_object_activate(head, &rcuhead_debug_descr);
 	debug_object_active_state(head, &rcuhead_debug_descr,
 				  STATE_RCU_HEAD_READY,
@@ -797,4 +809,60 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
+{
+	return offset < 4096;
+}
+
+static __always_inline
+void __kfree_rcu(struct rcu_head *head, unsigned long offset)
+{
+	typedef void (*rcu_callback)(struct rcu_head *);
+
+	BUILD_BUG_ON(!__builtin_constant_p(offset));
+
+	/* See the kfree_rcu() header comment. */
+	BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
+
+	call_rcu(head, (rcu_callback)offset);
+}
+
+extern void kfree(const void *);
+
+static inline void __rcu_reclaim(struct rcu_head *head)
+{
+	unsigned long offset = (unsigned long)head->func;
+
+	if (__is_kfree_rcu_offset(offset))
+		kfree((void *)head - offset);
+	else
+		head->func(head);
+}
+
+/**
+ * kfree_rcu() - kfree an object after a grace period.
+ * @ptr:	pointer to kfree
+ * @rcu_head:	the name of the struct rcu_head within the type of @ptr.
+ *
+ * Many rcu callbacks functions just call kfree() on the base structure.
+ * These functions are trivial, but their size adds up, and furthermore
+ * when they are used in a kernel module, that module must invoke the
+ * high-latency rcu_barrier() function at module-unload time.
+ *
+ * The kfree_rcu() function handles this issue.  Rather than encoding a
+ * function address in the embedded rcu_head structure, kfree_rcu() instead
+ * encodes the offset of the rcu_head structure within the base structure.
+ * Because the functions are not allowed in the low-order 4096 bytes of
+ * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
+ * If the offset is larger than 4095 bytes, a compile-time error will
+ * be generated in __kfree_rcu().  If this error is triggered, you can
+ * either fall back to use of call_rcu() or rearrange the structure to
+ * position the rcu_head structure into the first 4096 bytes.
+ *
+ * Note that the allowable offset might decrease in the future, for example,
+ * to allow something like kmem_cache_free_rcu().
+ */
+#define kfree_rcu(ptr, rcu_head)					\
+	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 30ebd7c8d874..52b3e0281fd0 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -100,6 +100,14 @@ static inline void rcu_note_context_switch(int cpu)
 }
 
 /*
+ * Take advantage of the fact that there is only one CPU, which
+ * allows us to ignore virtualization-based context switches.
+ */
+static inline void rcu_virt_note_context_switch(int cpu)
+{
+}
+
+/*
  * Return the number of grace periods.
  */
 static inline long rcu_batches_completed(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3a933482734a..e65d06634dd8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,6 +35,16 @@ extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern void rcu_cpu_stall_reset(void);
 
+/*
+ * Note a virtualization-based context switch.  This is simply a
+ * wrapper around rcu_note_context_switch(), which allows TINY_RCU
+ * to save a few bytes.
+ */
+static inline void rcu_virt_note_context_switch(int cpu)
+{
+	rcu_note_context_switch(cpu);
+}
+
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
 extern void exit_rcu(void);
@@ -58,9 +68,12 @@ static inline void synchronize_rcu_bh_expedited(void)
 
 extern void rcu_barrier(void);
 
+extern unsigned long rcutorture_testseq;
+extern unsigned long rcutorture_vernum;
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 extern long rcu_batches_completed_sched(void);
+
 extern void rcu_force_quiescent_state(void);
 extern void rcu_bh_force_quiescent_state(void);
 extern void rcu_sched_force_quiescent_state(void);
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 4e37a7cfa726..4d50611112ba 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -396,7 +396,7 @@ union rio_pw_msg {
 };
 
 /* Architecture and hardware-specific functions */
-extern void rio_register_mport(struct rio_mport *);
+extern int rio_register_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
 extern void rio_close_inb_mbox(struct rio_mport *, int);
 extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int);
diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h
index 7410d3365e2a..0cee0152aca9 100644
--- a/include/linux/rio_ids.h
+++ b/include/linux/rio_ids.h
@@ -35,6 +35,7 @@
 #define RIO_DID_IDTCPS6Q		0x035f
 #define RIO_DID_IDTCPS10Q		0x035e
 #define RIO_DID_IDTCPS1848		0x0374
+#define RIO_DID_IDTCPS1432		0x0375
 #define RIO_DID_IDTCPS1616		0x0379
 #define RIO_DID_IDTVPS1616		0x0377
 #define RIO_DID_IDTSPS1616		0x0378
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 2ca7e8a78060..877ece45426f 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -228,6 +228,8 @@ extern int rtc_read_alarm(struct rtc_device *rtc,
 			struct rtc_wkalrm *alrm);
 extern int rtc_set_alarm(struct rtc_device *rtc,
 				struct rtc_wkalrm *alrm);
+extern int rtc_initialize_alarm(struct rtc_device *rtc,
+				struct rtc_wkalrm *alrm);
 extern void rtc_update_irq(struct rtc_device *rtc,
 			unsigned long num, unsigned long events);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ec2c027e92c..885c4f242ad7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
-extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
+extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 
 struct nsproxy;
 struct user_namespace;
@@ -653,9 +653,8 @@ struct signal_struct {
  * Bits in flags field of signal_struct.
  */
 #define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
-#define SIGNAL_STOP_DEQUEUED	0x00000002 /* stop signal dequeued */
-#define SIGNAL_STOP_CONTINUED	0x00000004 /* SIGCONT since WCONTINUED reap */
-#define SIGNAL_GROUP_EXIT	0x00000008 /* group exit in progress */
+#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
 /*
  * Pending notifications to parent.
  */
@@ -731,10 +730,6 @@ struct sched_info {
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
-#ifdef CONFIG_SCHEDSTATS
-	/* BKL stats */
-	unsigned int bkl_count;
-#endif
 };
 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
 
@@ -868,6 +863,7 @@ static inline int sd_power_saving_flags(void)
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
+	atomic_t ref;
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -882,9 +878,6 @@ struct sched_group {
 	 * NOTE: this field is variable length. (Allocated dynamically
 	 * by attaching extra space to the end of the structure,
 	 * depending on how many CPUs the kernel has booted up with)
-	 *
-	 * It is also be embedded into static data structures at build
-	 * time. (See 'struct static_sched_group' in kernel/sched.c)
 	 */
 	unsigned long cpumask[0];
 };
@@ -894,17 +887,6 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 }
 
-enum sched_domain_level {
-	SD_LV_NONE = 0,
-	SD_LV_SIBLING,
-	SD_LV_MC,
-	SD_LV_BOOK,
-	SD_LV_CPU,
-	SD_LV_NODE,
-	SD_LV_ALLNODES,
-	SD_LV_MAX
-};
-
 struct sched_domain_attr {
 	int relax_domain_level;
 };
@@ -913,6 +895,8 @@ struct sched_domain_attr {
 	.relax_domain_level = -1,			\
 }
 
+extern int sched_domain_level_max;
+
 struct sched_domain {
 	/* These fields must be setup */
 	struct sched_domain *parent;	/* top domain must be null terminated */
@@ -930,7 +914,7 @@ struct sched_domain {
 	unsigned int forkexec_idx;
 	unsigned int smt_gain;
 	int flags;			/* See SD_* */
-	enum sched_domain_level level;
+	int level;
 
 	/* Runtime fields. */
 	unsigned long last_balance;	/* init to jiffies. units in jiffies */
@@ -973,6 +957,10 @@ struct sched_domain {
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
 #endif
+	union {
+		void *private;		/* used during construction */
+		struct rcu_head rcu;	/* used during destruction */
+	};
 
 	unsigned int span_weight;
 	/*
@@ -981,9 +969,6 @@ struct sched_domain {
 	 * NOTE: this field is variable length. (Allocated dynamically
 	 * by attaching extra space to the end of the structure,
 	 * depending on how many CPUs the kernel has booted up with)
-	 *
-	 * It is also be embedded into static data structures at build
-	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
 	 */
 	unsigned long span[0];
 };
@@ -1048,8 +1033,12 @@ struct sched_domain;
 #define WF_FORK		0x02		/* child wakeup after fork */
 
 #define ENQUEUE_WAKEUP		1
-#define ENQUEUE_WAKING		2
-#define ENQUEUE_HEAD		4
+#define ENQUEUE_HEAD		2
+#ifdef CONFIG_SMP
+#define ENQUEUE_WAKING		4	/* sched_class::task_waking was called */
+#else
+#define ENQUEUE_WAKING		0
+#endif
 
 #define DEQUEUE_SLEEP		1
 
@@ -1067,12 +1056,11 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
-			       int sd_flag, int flags);
+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
-	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+	void (*task_waking) (struct task_struct *task);
 	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
@@ -1197,13 +1185,11 @@ struct task_struct {
 	unsigned int flags;	/* per process flags, defined below */
 	unsigned int ptrace;
 
-	int lock_depth;		/* BKL lock depth */
-
 #ifdef CONFIG_SMP
-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
-	int oncpu;
-#endif
+	struct task_struct *wake_entry;
+	int on_cpu;
 #endif
+	int on_rq;
 
 	int prio, static_prio, normal_prio;
 	unsigned int rt_priority;
@@ -1254,6 +1240,9 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
+#ifdef CONFIG_COMPAT_BRK
+	unsigned brk_randomized:1;
+#endif
 #if defined(SPLIT_RSS_COUNTING)
 	struct task_rss_stat	rss_stat;
 #endif
@@ -1261,6 +1250,7 @@ struct task_struct {
 	int exit_state;
 	int exit_code, exit_signal;
 	int pdeath_signal;  /*  The signal sent when the parent dies  */
+	unsigned int group_stop;	/* GROUP_STOP_*, siglock protected */
 	/* ??? */
 	unsigned int personality;
 	unsigned did_exec:1;
@@ -1271,6 +1261,7 @@ struct task_struct {
 
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
+	unsigned sched_contributes_to_load:1;
 
 	pid_t pid;
 	pid_t tgid;
@@ -1534,6 +1525,9 @@ struct task_struct {
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
 #endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	atomic_t ptrace_bp_refcnt;
+#endif
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -1777,6 +1771,17 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
+/*
+ * task->group_stop flags
+ */
+#define GROUP_STOP_SIGMASK	0xffff    /* signr of the last group stop */
+#define GROUP_STOP_PENDING	(1 << 16) /* task should stop for group stop */
+#define GROUP_STOP_CONSUME	(1 << 17) /* consume group stop count */
+#define GROUP_STOP_TRAPPING	(1 << 18) /* switching from STOPPED to TRACED */
+#define GROUP_STOP_DEQUEUED	(1 << 19) /* stop signal dequeued */
+
+extern void task_clear_group_stop_pending(struct task_struct *task);
+
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
@@ -2057,14 +2062,13 @@ extern void xtime_update(unsigned long ticks);
 
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
-extern void wake_up_new_task(struct task_struct *tsk,
-				unsigned long clone_flags);
+extern void wake_up_new_task(struct task_struct *tsk);
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void sched_fork(struct task_struct *p, int clone_flags);
+extern void sched_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
@@ -2189,8 +2193,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+void scheduler_ipi(void);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void scheduler_ipi(void) { }
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
diff --git a/include/linux/security.h b/include/linux/security.h
index ca02f1716736..8ce59ef3e5af 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1456,7 +1456,7 @@ struct security_operations {
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
-	int (*inode_permission) (struct inode *inode, int mask);
+	int (*inode_permission) (struct inode *inode, int mask, unsigned flags);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
 	int (*inode_setxattr) (struct dentry *dentry, const char *name,
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e98cd2e57194..06d69648fc86 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -88,12 +88,12 @@ static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
 	unsigned ret;
 
 repeat:
-	ret = sl->sequence;
-	smp_rmb();
+	ret = ACCESS_ONCE(sl->sequence);
 	if (unlikely(ret & 1)) {
 		cpu_relax();
 		goto repeat;
 	}
+	smp_rmb();
 
 	return ret;
 }
diff --git a/include/linux/signal.h b/include/linux/signal.h
index fcd2b14b1932..a822300a253b 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -7,6 +7,8 @@
 #ifdef __KERNEL__
 #include <linux/list.h>
 
+struct task_struct;
+
 /* for sysctl */
 extern int print_fatal_signals;
 /*
@@ -123,13 +125,13 @@ _SIG_SET_BINOP(sigorsets, _sig_or)
 #define _sig_and(x,y)	((x) & (y))
 _SIG_SET_BINOP(sigandsets, _sig_and)
 
-#define _sig_nand(x,y)	((x) & ~(y))
-_SIG_SET_BINOP(signandsets, _sig_nand)
+#define _sig_andn(x,y)	((x) & ~(y))
+_SIG_SET_BINOP(sigandnsets, _sig_andn)
 
 #undef _SIG_SET_BINOP
 #undef _sig_or
 #undef _sig_and
-#undef _sig_nand
+#undef _sig_andn
 
 #define _SIG_SET_OP(name, op)						\
 static inline void name(sigset_t *set)					\
@@ -234,6 +236,9 @@ static inline int valid_signal(unsigned long sig)
 	return sig <= _NSIG ? 1 : 0;
 }
 
+struct timespec;
+struct pt_regs;
+
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
@@ -242,10 +247,12 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
 				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
+extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
+				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
+extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void exit_signals(struct task_struct *tsk);
 
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index b14f6a91e19f..a26e2fb604e6 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -5,6 +5,8 @@
 # error "please don't include this file directly"
 #endif
 
+#include <asm/processor.h>	/* for cpu_relax() */
+
 /*
  * include/linux/spinlock_up.h - UP-debug version of spinlocks.
  *
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index f017b8900f78..252e44821787 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -404,7 +404,9 @@ extern bool ssb_is_sprom_available(struct ssb_bus *bus);
 
 /* Set a fallback SPROM.
  * See kdoc at the function definition for complete documentation. */
-extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom);
+extern int ssb_arch_register_fallback_sprom(
+		int (*sprom_callback)(struct ssb_bus *bus,
+		struct ssb_sprom *out));
 
 /* Suspend a SSB bus.
  * Call this from the parent bus suspend routine. */
diff --git a/include/linux/string.h b/include/linux/string.h
index a716ee2a8adb..a176db2f2c85 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -123,6 +123,7 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
+extern int strtobool(const char *s, bool *res);
 
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index d81db8012c63..f73c482ec9c6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -127,13 +127,16 @@ struct rpc_task_setup {
 #define RPC_TASK_KILLED		0x0100		/* task was killed */
 #define RPC_TASK_SOFT		0x0200		/* Use soft timeouts */
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
+#define RPC_TASK_SENT		0x0800		/* message was sent */
+#define RPC_TASK_TIMEOUT	0x1000		/* fail with ETIMEDOUT on timeout */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
-#define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
+#define RPC_IS_SOFT(t)		((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
 #define RPC_IS_SOFTCONN(t)	((t)->tk_flags & RPC_TASK_SOFTCONN)
+#define RPC_WAS_SENT(t)		((t)->tk_flags & RPC_TASK_SENT)
 
 #define RPC_TASK_RUNNING	0
 #define RPC_TASK_QUEUED		1
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 5a89e3612875..083ffea7ba18 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -249,6 +249,8 @@ extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
 extern bool system_entering_hibernation(void);
 #else /* CONFIG_HIBERNATION */
+static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
 static inline void swsusp_set_page_free(struct page *p) {}
 static inline void swsusp_unset_page_free(struct page *p) {}
@@ -297,14 +299,7 @@ static inline bool pm_wakeup_pending(void) { return false; }
 
 extern struct mutex pm_mutex;
 
-#ifndef CONFIG_HIBERNATION
-static inline void register_nosave_region(unsigned long b, unsigned long e)
-{
-}
-static inline void register_nosave_region_late(unsigned long b, unsigned long e)
-{
-}
-
+#ifndef CONFIG_HIBERNATE_CALLBACKS
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
 
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index dfb078db8ebb..d35e783a598c 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -34,12 +34,6 @@ struct sysdev_class {
 	struct list_head	drivers;
 	struct sysdev_class_attribute **attrs;
 	struct kset		kset;
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	/* Default operations for these types of devices */
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 struct sysdev_class_attribute {
@@ -77,11 +71,6 @@ struct sysdev_driver {
 	struct list_head	entry;
 	int	(*add)(struct sys_device *);
 	int	(*remove)(struct sys_device *);
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 30b881555fa5..c3acda60eee0 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -176,7 +176,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 				      const unsigned char *name);
 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
-void sysfs_printk_last_file(void);
 
 /* Called to clear a ns tag when it is no longer valid */
 void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
@@ -348,10 +347,6 @@ static inline int __must_check sysfs_init(void)
 	return 0;
 }
 
-static inline void sysfs_printk_last_file(void)
-{
-}
-
 #endif /* CONFIG_SYSFS */
 
 #endif /* _SYSFS_H_ */
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 7071ec5d0118..b004e557caa9 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -140,12 +140,12 @@ extern long st_unregister(struct st_proto_s *);
  */
 struct st_data_s {
 	unsigned long st_state;
-	struct tty_struct *tty;
 	struct sk_buff *tx_skb;
 #define ST_TX_SENDING	1
 #define ST_TX_WAKEUP	2
 	unsigned long tx_state;
 	struct st_proto_s *list[ST_MAX_CHANNELS];
+	bool is_registered[ST_MAX_CHANNELS];
 	unsigned long rx_state;
 	unsigned long rx_count;
 	struct sk_buff *rx_skb;
@@ -155,6 +155,7 @@ struct st_data_s {
 	unsigned char	protos_registered;
 	unsigned long ll_state;
 	void *kim_data;
+	struct tty_struct *tty;
 };
 
 /*
diff --git a/include/linux/time.h b/include/linux/time.h
index 454a26205787..b3061782dec3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -126,6 +126,7 @@ struct timespec __current_kernel_time(void); /* does not take xtime_lock */
 struct timespec get_monotonic_coarse(void);
 void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 				struct timespec *wtom, struct timespec *sleep);
+void timekeeping_inject_sleeptime(struct timespec *delta);
 
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
@@ -294,6 +295,8 @@ struct itimerval {
 #define CLOCK_REALTIME_COARSE		5
 #define CLOCK_MONOTONIC_COARSE		6
 #define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
 
 /*
  * The IDs of various hardware clocks:
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index a520fd70a59f..5088727478fd 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -39,7 +39,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
 
 static inline void timerqueue_init(struct timerqueue_node *node)
 {
-	RB_CLEAR_NODE(&node->node);
+	rb_init_node(&node->node);
 }
 
 static inline void timerqueue_init_head(struct timerqueue_head *head)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index ebcfa4ebdbf8..e95f5236611f 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -469,33 +469,6 @@ static inline int tracehook_get_signal(struct task_struct *task,
 }
 
 /**
- * tracehook_notify_jctl - report about job control stop/continue
- * @notify:		zero, %CLD_STOPPED or %CLD_CONTINUED
- * @why:		%CLD_STOPPED or %CLD_CONTINUED
- *
- * This is called when we might call do_notify_parent_cldstop().
- *
- * @notify is zero if we would not ordinarily send a %SIGCHLD,
- * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
- *
- * @why is %CLD_STOPPED when about to stop for job control;
- * we are already in %TASK_STOPPED state, about to call schedule().
- * It might also be that we have just exited (check %PF_EXITING),
- * but need to report that a group-wide stop is complete.
- *
- * @why is %CLD_CONTINUED when waking up after job control stop and
- * ready to make a delayed @notify report.
- *
- * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
- *
- * Called with the siglock held.
- */
-static inline int tracehook_notify_jctl(int notify, int why)
-{
-	return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
-}
-
-/**
  * tracehook_finish_jctl - report about return from job control stop
  *
  * This is called by do_signal_stop() after wakeup.
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 97c84a58efb8..d530a4460a0b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -29,7 +29,7 @@ struct tracepoint_func {
 
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
-	int state;			/* State. */
+	struct jump_label_key key;
 	void (*regfunc)(void);
 	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
@@ -146,9 +146,7 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin,
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
-		JUMP_LABEL(&__tracepoint_##name.state, do_trace);	\
-		return;							\
-do_trace:								\
+		if (static_branch(&__tracepoint_##name.key))		\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
@@ -176,14 +174,14 @@ do_trace:								\
  * structures, so we create an array of pointers that will be used for iteration
  * on the tracepoints.
  */
-#define DEFINE_TRACE_FN(name, reg, unreg)				\
-	static const char __tpstrtab_##name[]				\
-	__attribute__((section("__tracepoints_strings"))) = #name;	\
-	struct tracepoint __tracepoint_##name				\
-	__attribute__((section("__tracepoints"))) =			\
-		{ __tpstrtab_##name, 0, reg, unreg, NULL };		\
-	static struct tracepoint * const __tracepoint_ptr_##name __used	\
-	__attribute__((section("__tracepoints_ptrs"))) =		\
+#define DEFINE_TRACE_FN(name, reg, unreg)				 \
+	static const char __tpstrtab_##name[]				 \
+	__attribute__((section("__tracepoints_strings"))) = #name;	 \
+	struct tracepoint __tracepoint_##name				 \
+	__attribute__((section("__tracepoints"))) =			 \
+		{ __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\
+	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
 		&__tracepoint_##name;
 
 #define DEFINE_TRACE(name)						\
diff --git a/include/linux/v4l2-mediabus.h b/include/linux/v4l2-mediabus.h
index 7054a7a8065e..de5c15921025 100644
--- a/include/linux/v4l2-mediabus.h
+++ b/include/linux/v4l2-mediabus.h
@@ -47,7 +47,7 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007,
 	V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008,
 
-	/* YUV (including grey) - next is 0x2013 */
+	/* YUV (including grey) - next is 0x2014 */
 	V4L2_MBUS_FMT_Y8_1X8 = 0x2001,
 	V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002,
 	V4L2_MBUS_FMT_VYUY8_1_5X8 = 0x2003,
@@ -60,6 +60,7 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_Y10_1X10 = 0x200a,
 	V4L2_MBUS_FMT_YUYV10_2X10 = 0x200b,
 	V4L2_MBUS_FMT_YVYU10_2X10 = 0x200c,
+	V4L2_MBUS_FMT_Y12_1X12 = 0x2013,
 	V4L2_MBUS_FMT_UYVY8_1X16 = 0x200f,
 	V4L2_MBUS_FMT_VYUY8_1X16 = 0x2010,
 	V4L2_MBUS_FMT_YUYV8_1X16 = 0x2011,
@@ -67,9 +68,11 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d,
 	V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e,
 
-	/* Bayer - next is 0x3013 */
+	/* Bayer - next is 0x3015 */
 	V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001,
+	V4L2_MBUS_FMT_SGBRG8_1X8 = 0x3013,
 	V4L2_MBUS_FMT_SGRBG8_1X8 = 0x3002,
+	V4L2_MBUS_FMT_SRGGB8_1X8 = 0x3014,
 	V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 = 0x300b,
 	V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8 = 0x300c,
 	V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8 = 0x3009,
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index aa6c393b7ae9..be82c8ead1af 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -308,6 +308,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y4      v4l2_fourcc('Y', '0', '4', ' ') /*  4  Greyscale     */
 #define V4L2_PIX_FMT_Y6      v4l2_fourcc('Y', '0', '6', ' ') /*  6  Greyscale     */
 #define V4L2_PIX_FMT_Y10     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
+#define V4L2_PIX_FMT_Y12     v4l2_fourcc('Y', '1', '2', ' ') /* 12  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
 /* Palette formats */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 461c0119664f..2b3831b58aa4 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -58,6 +58,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
 		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
 		UNEVICTABLE_MLOCKFREED,
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		THP_FAULT_ALLOC,
+		THP_FAULT_FALLBACK,
+		THP_COLLAPSE_ALLOC,
+		THP_COLLAPSE_ALLOC_FAILED,
+		THP_SPLIT,
+#endif
 		NR_VM_EVENT_ITEMS
 };
 
diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
index bd102cf509ac..d61febfb1668 100644
--- a/include/media/v4l2-device.h
+++ b/include/media/v4l2-device.h
@@ -163,7 +163,7 @@ v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev);
 ({									\
 	struct v4l2_subdev *__sd;					\
 	__v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, cond, o,	\
-						f, args...);		\
+						f , ##args);		\
 })
 
 /* Call the specified callback for all subdevs matching grp_id (if 0, then
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index cdf2e8ac4309..d2df55b0c213 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -139,8 +139,6 @@ do { \
  */
 
 enum p9_msg_t {
-	P9_TSYNCFS = 0,
-	P9_RSYNCFS,
 	P9_TLERROR = 6,
 	P9_RLERROR,
 	P9_TSTATFS = 8,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 85c1413f054d..051a99f79769 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -218,8 +218,8 @@ void p9_client_disconnect(struct p9_client *clnt);
 void p9_client_begin_disconnect(struct p9_client *clnt);
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 					char *uname, u32 n_uname, char *aname);
-struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
-								int clone);
+struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
+		char **wnames, int clone);
 int p9_client_open(struct p9_fid *fid, int mode);
 int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 							char *extension);
@@ -230,7 +230,6 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
 		gid_t gid, struct p9_qid *qid);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_fsync(struct p9_fid *fid, int datasync);
-int p9_client_sync_fs(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
 int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
 							u64 offset, u32 count);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 7e8e34c29270..b2c2366676a7 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -115,7 +115,6 @@
  * sctp/protocol.c
  */
 extern struct sock *sctp_get_ctl_sock(void);
-extern void sctp_local_addr_free(struct rcu_head *head);
 extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
 				     sctp_scope_t, gfp_t gfp,
 				     int flags);
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index cbb822e8d791..2d0191c90f9e 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -46,18 +46,9 @@ enum iw_cm_event_type {
 	IW_CM_EVENT_CLOSE		 /* close complete */
 };
 
-enum iw_cm_event_status {
-	IW_CM_EVENT_STATUS_OK = 0,	 /* request successful */
-	IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
-	IW_CM_EVENT_STATUS_REJECTED,	 /* connect request rejected */
-	IW_CM_EVENT_STATUS_TIMEOUT,	 /* the operation timed out */
-	IW_CM_EVENT_STATUS_RESET,	 /* reset from remote peer */
-	IW_CM_EVENT_STATUS_EINVAL,	 /* asynchronous failure for bad parm */
-};
-
 struct iw_cm_event {
 	enum iw_cm_event_type event;
-	enum iw_cm_event_status status;
+	int			 status;
 	struct sockaddr_in local_addr;
 	struct sockaddr_in remote_addr;
 	void *private_data;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 4fae90304648..169f7a53fb0c 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -329,4 +329,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
  */
 void rdma_set_service_type(struct rdma_cm_id *id, int tos);
 
+/**
+ * rdma_set_reuseaddr - Allow the reuse of local addresses when binding
+ *    the rdma_cm_id.
+ * @id: Communication identifier to configure.
+ * @reuse: Value indicating if the bound address is reusable.
+ *
+ * Reuse must be set before an address is bound to the id.
+ */
+int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
+
 #endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index 1d165022c02d..fc82c1896f75 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -221,8 +221,9 @@ enum {
 
 /* Option details */
 enum {
-	RDMA_OPTION_ID_TOS	= 0,
-	RDMA_OPTION_IB_PATH	= 1
+	RDMA_OPTION_ID_TOS	 = 0,
+	RDMA_OPTION_ID_REUSEADDR = 1,
+	RDMA_OPTION_IB_PATH	 = 1
 };
 
 struct rdma_ucm_set_option {
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 2d3ec5094685..dd82e02ddde3 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -169,6 +169,7 @@ struct scsi_device {
 				sdev_dev;
 
 	struct execute_work	ew; /* used to get process context on put */
+	struct work_struct	requeue_work;
 
 	struct scsi_dh_data	*scsi_dh_data;
 	enum scsi_device_state sdev_state;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 78f18adb49c8..bf366547da25 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -401,9 +401,9 @@ TRACE_EVENT(block_plug,
 
 DECLARE_EVENT_CLASS(block_unplug,
 
-	TP_PROTO(struct request_queue *q),
+	TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
 
-	TP_ARGS(q),
+	TP_ARGS(q, depth, explicit),
 
 	TP_STRUCT__entry(
 		__field( int,		nr_rq			)
@@ -411,7 +411,7 @@ DECLARE_EVENT_CLASS(block_unplug,
 	),
 
 	TP_fast_assign(
-		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		__entry->nr_rq = depth;
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -419,31 +419,19 @@ DECLARE_EVENT_CLASS(block_unplug,
 );
 
 /**
- * block_unplug_timer - timed release of operations requests in queue to device driver
- * @q: request queue to unplug
- *
- * Unplug the request queue @q because a timer expired and allow block
- * operation requests to be sent to the device driver.
- */
-DEFINE_EVENT(block_unplug, block_unplug_timer,
-
-	TP_PROTO(struct request_queue *q),
-
-	TP_ARGS(q)
-);
-
-/**
- * block_unplug_io - release of operations requests in request queue
+ * block_unplug - release of operations requests in request queue
  * @q: request queue to unplug
+ * @depth: number of requests just added to the queue
+ * @explicit: whether this was an explicit unplug, or one from schedule()
  *
  * Unplug request queue @q because device driver is scheduled to work
  * on elements in the request queue.
  */
-DEFINE_EVENT(block_unplug, block_unplug_io,
+DEFINE_EVENT(block_unplug, block_unplug,
 
-	TP_PROTO(struct request_queue *q),
+	TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
 
-	TP_ARGS(q)
+	TP_ARGS(q, depth, explicit)
 );
 
 /**
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index e3615c093741..9fe3a36646e9 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -10,6 +10,7 @@
  */
 #define show_gfp_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
+	{(unsigned long)GFP_TRANSHUGE,		"GFP_TRANSHUGE"},	\
 	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"}, \
 	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
 	{(unsigned long)GFP_USER,		"GFP_USER"},		\
@@ -32,6 +33,9 @@
 	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
 	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
-	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"}		\
+	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
+	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
+	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
+	{(unsigned long)__GFP_OTHER_NODE,	"GFP_OTHER_NODE"}	\
 	) : "GFP_NOWAIT"
 
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 1c09820df585..ae045ca7d356 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -20,8 +20,7 @@ struct softirq_action;
 			 softirq_name(BLOCK_IOPOLL),	\
 			 softirq_name(TASKLET),		\
 			 softirq_name(SCHED),		\
-			 softirq_name(HRTIMER),		\
-			 softirq_name(RCU))
+			 softirq_name(HRTIMER))
 
 /**
  * irq_handler_entry - called immediately before the irq action handler
diff --git a/include/xen/events.h b/include/xen/events.h
index f1b87ad48ac7..9af21e19545a 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -85,7 +85,8 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
 /* Bind an PSI pirq to an irq. */
 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-			     int pirq, int vector, const char *name);
+			     int pirq, int vector, const char *name,
+			     domid_t domid);
 #endif
 
 /* De-allocates the above mentioned physical interrupt. */
@@ -94,4 +95,10 @@ int xen_destroy_irq(int irq);
 /* Return irq from pirq */
 int xen_irq_from_pirq(unsigned pirq);
 
+/* Return the pirq allocated to the irq. */
+int xen_pirq_from_irq(unsigned irq);
+
+/* Determine whether to ignore this IRQ if it is passed to a guest. */
+int xen_test_irq_shared(int irq);
+
 #endif	/* _XEN_EVENTS_H */
diff --git a/init/Kconfig b/init/Kconfig
index 56240e724d9a..4986ecc49e65 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -485,7 +485,7 @@ config TREE_RCU_TRACE
 
 config RCU_BOOST
 	bool "Enable RCU priority boosting"
-	depends on RT_MUTEXES && TINY_PREEMPT_RCU
+	depends on RT_MUTEXES && PREEMPT_RCU
 	default n
 	help
 	  This option boosts the priority of preempted RCU readers that
@@ -827,6 +827,11 @@ config SCHED_AUTOGROUP
 	  desktop applications.  Task group autogeneration is currently based
 	  upon task session.
 
+config SCHED_TTWU_QUEUE
+	bool
+	depends on !SPARC32
+	default y
+
 config MM_OWNER
 	bool
 
@@ -924,14 +929,6 @@ menuconfig EXPERT
           environments which can tolerate a "non-standard" kernel.
           Only use this if you really know what you are doing.
 
-config EMBEDDED
-	bool "Embedded system"
-	select EXPERT
-	help
-	  This option should be enabled if compiling the kernel for
-	  an embedded system so certain expert options are available
-	  for configuration.
-
 config UID16
 	bool "Enable 16-bit UID system calls" if EXPERT
 	depends on ARM || BLACKFIN || CRIS || FRV || H8300 || X86_32 || M68K || (S390 && !64BIT) || SUPERH || SPARC32 || (SPARC64 && COMPAT) || UML || (X86_64 && IA32_EMULATION)
@@ -1104,6 +1101,14 @@ config AIO
           by some high performance threaded applications. Disabling
           this option saves about 7k.
 
+config EMBEDDED
+	bool "Embedded system"
+	select EXPERT
+	help
+	  This option should be enabled if compiling the kernel for
+	  an embedded system so certain expert options are available
+	  for configuration.
+
 config HAVE_PERF_EVENTS
 	bool
 	help
diff --git a/init/main.c b/init/main.c
index 4a9479ef4540..48df882d51d2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -580,8 +580,8 @@ asmlinkage void __init start_kernel(void)
 #endif
 	page_cgroup_init();
 	enable_debug_pagealloc();
-	kmemleak_init();
 	debug_objects_mem_init();
+	kmemleak_init();
 	setup_per_cpu_pageset();
 	numa_policy_init();
 	if (late_time_init)
diff --git a/kernel/Makefile b/kernel/Makefile
index 85cbfb31e73e..e9cf19155b46 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,7 +21,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
-CFLAGS_REMOVE_perf_event.o = -pg
 CFLAGS_REMOVE_irq_work.o = -pg
 endif
 
@@ -103,8 +102,9 @@ obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o
-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+
+obj-$(CONFIG_PERF_EVENTS) += events/
+
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
diff --git a/kernel/capability.c b/kernel/capability.c
index bf0c734d0c12..32a80e08ff4b 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap)
 	return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
 }
 EXPORT_SYMBOL(task_ns_capable);
+
+/**
+ * nsown_capable - Check superior capability to one's own user_ns
+ * @cap: The capability in question
+ *
+ * Return true if the current task has the given superior capability
+ * targeted at its own user namespace.
+ */
+bool nsown_capable(int cap)
+{
+	return ns_capable(current_user_ns(), cap);
+}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 25c7eb52de1a..909a35510af5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -326,12 +326,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 	return &css_set_table[index];
 }
 
-static void free_css_set_rcu(struct rcu_head *obj)
-{
-	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
-	kfree(cg);
-}
-
 /* We don't maintain the lists running through each css_set to its
  * task until after the first call to cgroup_iter_start(). This
  * reduces the fork()/exit() overhead for people who have cgroups
@@ -375,7 +369,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 	}
 
 	write_unlock(&css_set_lock);
-	call_rcu(&cg->rcu_head, free_css_set_rcu);
+	kfree_rcu(cg, rcu_head);
 }
 
 /*
@@ -812,13 +806,6 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
 	return ret;
 }
 
-static void free_cgroup_rcu(struct rcu_head *obj)
-{
-	struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
-
-	kfree(cgrp);
-}
-
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 {
 	/* is dentry a directory ? if so, kfree() associated cgroup */
@@ -856,7 +843,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		 */
 		BUG_ON(!list_empty(&cgrp->pidlists));
 
-		call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
+		kfree_rcu(cgrp, rcu_head);
 	}
 	iput(inode);
 }
@@ -4623,14 +4610,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
 	return ret;
 }
 
-static void __free_css_id_cb(struct rcu_head *head)
-{
-	struct css_id *id;
-
-	id = container_of(head, struct css_id, rcu_head);
-	kfree(id);
-}
-
 void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
 {
 	struct css_id *id = css->id;
@@ -4645,7 +4624,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
 	spin_lock(&ss->id_lock);
 	idr_remove(&ss->idr, id->id);
 	spin_unlock(&ss->id_lock);
-	call_rcu(&id->rcu_head, __free_css_id_cb);
+	kfree_rcu(id, rcu_head);
 }
 EXPORT_SYMBOL_GPL(free_css_id);
 
diff --git a/kernel/compat.c b/kernel/compat.c
index 38b1d2c1cbe8..9214dcd087b7 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -890,10 +890,9 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 {
 	compat_sigset_t s32;
 	sigset_t s;
-	int sig;
 	struct timespec t;
 	siginfo_t info;
-	long ret, timeout = 0;
+	long ret;
 
 	if (sigsetsize != sizeof(sigset_t))
 		return -EINVAL;
@@ -901,51 +900,19 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
 		return -EFAULT;
 	sigset_from_compat(&s, &s32);
-	sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
-	signotset(&s);
 
 	if (uts) {
-		if (get_compat_timespec (&t, uts))
+		if (get_compat_timespec(&t, uts))
 			return -EFAULT;
-		if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
-				|| t.tv_sec < 0)
-			return -EINVAL;
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	sig = dequeue_signal(current, &s, &info);
-	if (!sig) {
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (uts)
-			timeout = timespec_to_jiffies(&t)
-				+(t.tv_sec || t.tv_nsec);
-		if (timeout) {
-			current->real_blocked = current->blocked;
-			sigandsets(&current->blocked, &current->blocked, &s);
-
-			recalc_sigpending();
-			spin_unlock_irq(&current->sighand->siglock);
-
-			timeout = schedule_timeout_interruptible(timeout);
-
-			spin_lock_irq(&current->sighand->siglock);
-			sig = dequeue_signal(current, &s, &info);
-			current->blocked = current->real_blocked;
-			siginitset(&current->real_blocked, 0);
-			recalc_sigpending();
-		}
-	}
-	spin_unlock_irq(&current->sighand->siglock);
+	ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
 
-	if (sig) {
-		ret = sig;
-		if (uinfo) {
-			if (copy_siginfo_to_user32(uinfo, &info))
-				ret = -EFAULT;
-		}
-	}else {
-		ret = timeout?-EINTR:-EAGAIN;
+	if (ret > 0 && uinfo) {
+		if (copy_siginfo_to_user32(uinfo, &info))
+			ret = -EFAULT;
 	}
+
 	return ret;
 
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 33eee16addb8..2bb8c2e98fff 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1159,7 +1159,7 @@ int current_cpuset_is_being_rebound(void)
 static int update_relax_domain_level(struct cpuset *cs, s64 val)
 {
 #ifdef CONFIG_SMP
-	if (val < -1 || val >= SD_LV_MAX)
+	if (val < -1 || val >= sched_domain_level_max)
 		return -EINVAL;
 #endif
 
diff --git a/kernel/cred.c b/kernel/cred.c
index 5557b55048df..8093c16b84b1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -54,6 +54,7 @@ struct cred init_cred = {
 	.cap_effective		= CAP_INIT_EFF_SET,
 	.cap_bset		= CAP_INIT_BSET,
 	.user			= INIT_USER,
+	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
 #ifdef CONFIG_KEYS
 	.tgcred			= &init_tgcred,
@@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 			goto error_put;
 	}
 
+	/* cache user_ns in cred.  Doesn't need a refcount because it will
+	 * stay pinned by cred->user
+	 */
+	new->user_ns = new->user->user_ns;
+
 #ifdef CONFIG_KEYS
 	/* new threads get their own thread keyrings if their parent already
 	 * had one */
@@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode)
 }
 EXPORT_SYMBOL(set_create_files_as);
 
-struct user_namespace *current_user_ns(void)
-{
-	return _current_user_ns();
-}
-EXPORT_SYMBOL(current_user_ns);
-
 #ifdef CONFIG_DEBUG_CREDENTIALS
 
 bool creds_are_invalid(const struct cred *cred)
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
new file mode 100644
index 000000000000..1ce23d3d8394
--- /dev/null
+++ b/kernel/events/Makefile
@@ -0,0 +1,6 @@
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_core.o = -pg
+endif
+
+obj-y := core.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/kernel/perf_event.c b/kernel/events/core.c
index 27960f114efd..c09767f7db3e 100644
--- a/kernel/perf_event.c
+++ b/kernel/events/core.c
@@ -2,8 +2,8 @@
  * Performance events core code:
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
  * For licensing details see kernel-base/COPYING
@@ -39,10 +39,10 @@
 #include <asm/irq_regs.h>
 
 struct remote_function_call {
-	struct task_struct *p;
-	int (*func)(void *info);
-	void *info;
-	int ret;
+	struct task_struct	*p;
+	int			(*func)(void *info);
+	void			*info;
+	int			ret;
 };
 
 static void remote_function(void *data)
@@ -76,10 +76,10 @@ static int
 task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
-		.p = p,
-		.func = func,
-		.info = info,
-		.ret = -ESRCH, /* No such (running) process */
+		.p	= p,
+		.func	= func,
+		.info	= info,
+		.ret	= -ESRCH, /* No such (running) process */
 	};
 
 	if (task_curr(p))
@@ -100,10 +100,10 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
-		.p = NULL,
-		.func = func,
-		.info = info,
-		.ret = -ENXIO, /* No such CPU */
+		.p	= NULL,
+		.func	= func,
+		.info	= info,
+		.ret	= -ENXIO, /* No such CPU */
 	};
 
 	smp_call_function_single(cpu, remote_function, &data, 1);
@@ -125,7 +125,7 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-atomic_t perf_sched_events __read_mostly;
+struct jump_label_key perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -364,6 +364,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 			}
 
 			if (mode & PERF_CGROUP_SWIN) {
+				WARN_ON_ONCE(cpuctx->cgrp);
 				/* set cgrp before ctxsw in to
 				 * allow event_filter_match() to not
 				 * have to pass task around
@@ -585,14 +586,6 @@ static void get_ctx(struct perf_event_context *ctx)
 	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
 }
 
-static void free_ctx(struct rcu_head *head)
-{
-	struct perf_event_context *ctx;
-
-	ctx = container_of(head, struct perf_event_context, rcu_head);
-	kfree(ctx);
-}
-
 static void put_ctx(struct perf_event_context *ctx)
 {
 	if (atomic_dec_and_test(&ctx->refcount)) {
@@ -600,7 +593,7 @@ static void put_ctx(struct perf_event_context *ctx)
 			put_ctx(ctx->parent_ctx);
 		if (ctx->task)
 			put_task_struct(ctx->task);
-		call_rcu(&ctx->rcu_head, free_ctx);
+		kfree_rcu(ctx, rcu_head);
 	}
 }
 
@@ -2423,6 +2416,14 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 	if (!ctx || !ctx->nr_events)
 		goto out;
 
+	/*
+	 * We must ctxsw out cgroup events to avoid conflict
+	 * when invoking perf_task_event_sched_in() later on
+	 * in this function. Otherwise we end up trying to
+	 * ctxswin cgroup events which are already scheduled
+	 * in.
+	 */
+	perf_cgroup_sched_out(current);
 	task_ctx_sched_out(ctx, EVENT_ALL);
 
 	raw_spin_lock(&ctx->lock);
@@ -2447,6 +2448,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 
 	raw_spin_unlock(&ctx->lock);
 
+	/*
+	 * Also calls ctxswin for cgroup events, if any:
+	 */
 	perf_event_context_sched_in(ctx, ctx->task);
 out:
 	local_irq_restore(flags);
@@ -5319,14 +5323,6 @@ swevent_hlist_deref(struct swevent_htable *swhash)
 					 lockdep_is_held(&swhash->hlist_mutex));
 }
 
-static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
-{
-	struct swevent_hlist *hlist;
-
-	hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
-	kfree(hlist);
-}
-
 static void swevent_hlist_release(struct swevent_htable *swhash)
 {
 	struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
@@ -5335,7 +5331,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
 		return;
 
 	rcu_assign_pointer(swhash->swevent_hlist, NULL);
-	call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
+	kfree_rcu(hlist, rcu_head);
 }
 
 static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
@@ -5417,7 +5413,7 @@ fail:
 	return err;
 }
 
-atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
 {
@@ -7433,11 +7429,11 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
 }
 
 struct cgroup_subsys perf_subsys = {
-	.name = "perf_event",
-	.subsys_id = perf_subsys_id,
-	.create = perf_cgroup_create,
-	.destroy = perf_cgroup_destroy,
-	.exit = perf_cgroup_exit,
-	.attach = perf_cgroup_attach,
+	.name		= "perf_event",
+	.subsys_id	= perf_subsys_id,
+	.create		= perf_cgroup_create,
+	.destroy	= perf_cgroup_destroy,
+	.exit		= perf_cgroup_exit,
+	.attach		= perf_cgroup_attach,
 };
 #endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 086adf25a55e..086adf25a55e 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
diff --git a/kernel/exit.c b/kernel/exit.c
index f5d2f63bae0b..20a406471525 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1016,7 +1016,7 @@ NORET_TYPE void do_exit(long code)
 	/*
 	 * FIXME: do that only when needed, using sched_exit tracepoint
 	 */
-	flush_ptrace_hw_breakpoint(tsk);
+	ptrace_put_breakpoints(tsk);
 
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
@@ -1377,11 +1377,23 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
 	return NULL;
 }
 
-/*
- * Handle sys_wait4 work for one task in state TASK_STOPPED.  We hold
- * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
- * the lock and this task is uninteresting.  If we return nonzero, we have
- * released the lock and the system call should return.
+/**
+ * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
+ * @wo: wait options
+ * @ptrace: is the wait for ptrace
+ * @p: task to wait for
+ *
+ * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
+ *
+ * CONTEXT:
+ * read_lock(&tasklist_lock), which is released if return value is
+ * non-zero.  Also, grabs and releases @p->sighand->siglock.
+ *
+ * RETURNS:
+ * 0 if wait condition didn't exist and search for other wait conditions
+ * should continue.  Non-zero return, -errno on failure and @p's pid on
+ * success, implies that tasklist_lock is released and wait condition
+ * search should terminate.
  */
 static int wait_task_stopped(struct wait_opts *wo,
 				int ptrace, struct task_struct *p)
@@ -1397,6 +1409,9 @@ static int wait_task_stopped(struct wait_opts *wo,
 	if (!ptrace && !(wo->wo_flags & WUNTRACED))
 		return 0;
 
+	if (!task_stopped_code(p, ptrace))
+		return 0;
+
 	exit_code = 0;
 	spin_lock_irq(&p->sighand->siglock);
 
@@ -1538,33 +1553,84 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
 		return 0;
 	}
 
-	if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+	/* dead body doesn't have much to contribute */
+	if (p->exit_state == EXIT_DEAD)
+		return 0;
+
+	/* slay zombie? */
+	if (p->exit_state == EXIT_ZOMBIE) {
+		/*
+		 * A zombie ptracee is only visible to its ptracer.
+		 * Notification and reaping will be cascaded to the real
+		 * parent when the ptracer detaches.
+		 */
+		if (likely(!ptrace) && unlikely(task_ptrace(p))) {
+			/* it will become visible, clear notask_error */
+			wo->notask_error = 0;
+			return 0;
+		}
+
+		/* we don't reap group leaders with subthreads */
+		if (!delay_group_leader(p))
+			return wait_task_zombie(wo, p);
+
 		/*
-		 * This child is hidden by ptrace.
-		 * We aren't allowed to see it now, but eventually we will.
+		 * Allow access to stopped/continued state via zombie by
+		 * falling through.  Clearing of notask_error is complex.
+		 *
+		 * When !@ptrace:
+		 *
+		 * If WEXITED is set, notask_error should naturally be
+		 * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
+		 * so, if there are live subthreads, there are events to
+		 * wait for.  If all subthreads are dead, it's still safe
+		 * to clear - this function will be called again in finite
+		 * amount time once all the subthreads are released and
+		 * will then return without clearing.
+		 *
+		 * When @ptrace:
+		 *
+		 * Stopped state is per-task and thus can't change once the
+		 * target task dies.  Only continued and exited can happen.
+		 * Clear notask_error if WCONTINUED | WEXITED.
+		 */
+		if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
+			wo->notask_error = 0;
+	} else {
+		/*
+		 * If @p is ptraced by a task in its real parent's group,
+		 * hide group stop/continued state when looking at @p as
+		 * the real parent; otherwise, a single stop can be
+		 * reported twice as group and ptrace stops.
+		 *
+		 * If a ptracer wants to distinguish the two events for its
+		 * own children, it should create a separate process which
+		 * takes the role of real parent.
+		 */
+		if (likely(!ptrace) && task_ptrace(p) &&
+		    same_thread_group(p->parent, p->real_parent))
+			return 0;
+
+		/*
+		 * @p is alive and it's gonna stop, continue or exit, so
+		 * there always is something to wait for.
 		 */
 		wo->notask_error = 0;
-		return 0;
 	}
 
-	if (p->exit_state == EXIT_DEAD)
-		return 0;
-
 	/*
-	 * We don't reap group leaders with subthreads.
+	 * Wait for stopped.  Depending on @ptrace, different stopped state
+	 * is used and the two don't interact with each other.
 	 */
-	if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
-		return wait_task_zombie(wo, p);
+	ret = wait_task_stopped(wo, ptrace, p);
+	if (ret)
+		return ret;
 
 	/*
-	 * It's stopped or running now, so it might
-	 * later continue, exit, or stop again.
+	 * Wait for continued.  There's only one continued state and the
+	 * ptracer can consume it which can confuse the real parent.  Don't
+	 * use WCONTINUED from ptracer.  You don't need or want it.
 	 */
-	wo->notask_error = 0;
-
-	if (task_stopped_code(p, ptrace))
-		return wait_task_stopped(wo, ptrace, p);
-
 	return wait_task_continued(wo, p);
 }
 
diff --git a/kernel/extable.c b/kernel/extable.c
index 7f8f263f8524..5339705b8241 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -72,6 +72,24 @@ int core_kernel_text(unsigned long addr)
 	return 0;
 }
 
+/**
+ * core_kernel_data - tell if addr points to kernel data
+ * @addr: address to test
+ *
+ * Returns true if @addr passed in is from the core kernel data
+ * section.
+ *
+ * Note: On some archs it may return true for core RODATA, and false
+ *  for others. But will always be true for core RW data.
+ */
+int core_kernel_data(unsigned long addr)
+{
+	if (addr >= (unsigned long)_sdata &&
+	    addr < (unsigned long)_edata)
+		return 1;
+	return 0;
+}
+
 int __kernel_text_address(unsigned long addr)
 {
 	if (core_kernel_text(addr))
diff --git a/kernel/fork.c b/kernel/fork.c
index e7548dee636b..2b44d82b8237 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1103,7 +1103,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	posix_cpu_timers_init(p);
 
-	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
 	p->real_start_time = p->start_time;
 	monotonic_to_bootbased(&p->real_start_time);
@@ -1153,7 +1152,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
-	sched_fork(p, clone_flags);
+	sched_fork(p);
 
 	retval = perf_event_init_task(p);
 	if (retval)
@@ -1464,7 +1463,7 @@ long do_fork(unsigned long clone_flags,
 		 */
 		p->flags &= ~PF_STARTING;
 
-		wake_up_new_task(p, clone_flags);
+		wake_up_new_task(p);
 
 		tracehook_report_clone_complete(trace, regs,
 						clone_flags, nr, p);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 66ecd2ead215..7b01de98bb6a 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -17,7 +17,7 @@ static inline void frozen_process(void)
 {
 	if (!unlikely(current->flags & PF_NOFREEZE)) {
 		current->flags |= PF_FROZEN;
-		wmb();
+		smp_wmb();
 	}
 	clear_freeze_flag(current);
 }
@@ -93,7 +93,7 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 	 * the task as frozen and next clears its TIF_FREEZE.
 	 */
 	if (!freezing(p)) {
-		rmb();
+		smp_rmb();
 		if (frozen(p))
 			return false;
 
diff --git a/kernel/futex.c b/kernel/futex.c
index dfb924ffe65b..fe28dc282eae 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1886,7 +1886,7 @@ retry:
 	restart->futex.val = val;
 	restart->futex.time = abs_time->tv64;
 	restart->futex.bitset = bitset;
-	restart->futex.flags = flags;
+	restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
 
 	ret = -ERESTART_RESTARTBLOCK;
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9017478c5d4c..dbbbf7d43080 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -81,7 +81,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 	}
 };
 
-static int hrtimer_clock_to_base_table[MAX_CLOCKS];
+static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
+	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
+	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
+	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
+};
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
 {
@@ -1722,10 +1726,6 @@ static struct notifier_block __cpuinitdata hrtimers_nb = {
 
 void __init hrtimers_init(void)
 {
-	hrtimer_clock_to_base_table[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME;
-	hrtimer_clock_to_base_table[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC;
-	hrtimer_clock_to_base_table[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME;
-
 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
 			  (void *)(long)smp_processor_id());
 	register_cpu_notifier(&hrtimers_nb);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 53ead174da2f..ea640120ab86 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -33,7 +33,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
 /*
  * Zero means infinite timeout - no checking done:
  */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
 
 unsigned long __read_mostly sysctl_hung_task_warnings = 10;
 
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index c574f9a12c48..d1d051b38e0b 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -48,6 +48,10 @@ config IRQ_PREFLOW_FASTEOI
 config IRQ_EDGE_EOI_HANDLER
        bool
 
+# Generic configurable interrupt chip implementation
+config GENERIC_IRQ_CHIP
+       bool
+
 # Support forced irq threading
 config IRQ_FORCED_THREADING
        bool
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 54329cd7b3ee..73290056cfb6 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,6 @@
 
 obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4af1e2b244cb..d5a3009da71a 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -310,6 +310,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
 out_unlock:
 	raw_spin_unlock(&desc->lock);
 }
+EXPORT_SYMBOL_GPL(handle_simple_irq);
 
 /**
  *	handle_level_irq - Level type irq handler
@@ -573,6 +574,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 	if (handle != handle_bad_irq && is_chained) {
 		irq_settings_set_noprobe(desc);
 		irq_settings_set_norequest(desc);
+		irq_settings_set_nothread(desc);
 		irq_startup(desc);
 	}
 out:
@@ -612,6 +614,7 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
 
 	irq_put_desc_unlock(desc, flags);
 }
+EXPORT_SYMBOL_GPL(irq_modify_status);
 
 /**
  *	irq_cpu_online - Invoke all irq_cpu_online functions.
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 306cba37e9a5..97a8bfadc88a 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -27,6 +27,7 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 	P(IRQ_PER_CPU);
 	P(IRQ_NOPROBE);
 	P(IRQ_NOREQUEST);
+	P(IRQ_NOTHREAD);
 	P(IRQ_NOAUTOEN);
 
 	PS(IRQS_AUTODETECT);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
new file mode 100644
index 000000000000..31a9db711906
--- /dev/null
+++ b/kernel/irq/generic-chip.c
@@ -0,0 +1,354 @@
+/*
+ * Library implementing the most common irq chip callback functions
+ *
+ * Copyright (C) 2011, Thomas Gleixner
+ */
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/syscore_ops.h>
+
+#include "internals.h"
+
+static LIST_HEAD(gc_list);
+static DEFINE_RAW_SPINLOCK(gc_lock);
+
+static inline struct irq_chip_regs *cur_regs(struct irq_data *d)
+{
+	return &container_of(d->chip, struct irq_chip_type, chip)->regs;
+}
+
+/**
+ * irq_gc_noop - NOOP function
+ * @d: irq_data
+ */
+void irq_gc_noop(struct irq_data *d)
+{
+}
+
+/**
+ * irq_gc_mask_disable_reg - Mask chip via disable register
+ * @d: irq_data
+ *
+ * Chip has separate enable/disable registers instead of a single mask
+ * register.
+ */
+void irq_gc_mask_disable_reg(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->disable);
+	gc->mask_cache &= ~mask;
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_set_mask_bit - Mask chip via setting bit in mask register
+ * @d: irq_data
+ *
+ * Chip has a single mask register. Values of this register are cached
+ * and protected by gc->lock
+ */
+void irq_gc_mask_set_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	gc->mask_cache |= mask;
+	irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_set_mask_bit - Mask chip via clearing bit in mask register
+ * @d: irq_data
+ *
+ * Chip has a single mask register. Values of this register are cached
+ * and protected by gc->lock
+ */
+void irq_gc_mask_clr_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	gc->mask_cache &= ~mask;
+	irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_unmask_enable_reg - Unmask chip via enable register
+ * @d: irq_data
+ *
+ * Chip has separate enable/disable registers instead of a single mask
+ * register.
+ */
+void irq_gc_unmask_enable_reg(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->enable);
+	gc->mask_cache |= mask;
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_ack - Ack pending interrupt
+ * @d: irq_data
+ */
+void irq_gc_ack(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt
+ * @d: irq_data
+ */
+void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->mask);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_eoi - EOI interrupt
+ * @d: irq_data
+ */
+void irq_gc_eoi(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->eoi);
+	irq_gc_unlock(gc);
+}
+
+/**
+ * irq_gc_set_wake - Set/clr wake bit for an interrupt
+ * @d: irq_data
+ *
+ * For chips where the wake from suspend functionality is not
+ * configured in a separate register and the wakeup active state is
+ * just stored in a bitmask.
+ */
+int irq_gc_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << (d->irq - gc->irq_base);
+
+	if (!(mask & gc->wake_enabled))
+		return -EINVAL;
+
+	irq_gc_lock(gc);
+	if (on)
+		gc->wake_active |= mask;
+	else
+		gc->wake_active &= ~mask;
+	irq_gc_unlock(gc);
+	return 0;
+}
+
+/**
+ * irq_alloc_generic_chip - Allocate a generic chip and initialize it
+ * @name:	Name of the irq chip
+ * @num_ct:	Number of irq_chip_type instances associated with this
+ * @irq_base:	Interrupt base nr for this chip
+ * @reg_base:	Register base address (virtual)
+ * @handler:	Default flow handler associated with this chip
+ *
+ * Returns an initialized irq_chip_generic structure. The chip defaults
+ * to the primary (index 0) irq_chip_type and @handler
+ */
+struct irq_chip_generic *
+irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base,
+		       void __iomem *reg_base, irq_flow_handler_t handler)
+{
+	struct irq_chip_generic *gc;
+	unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+
+	gc = kzalloc(sz, GFP_KERNEL);
+	if (gc) {
+		raw_spin_lock_init(&gc->lock);
+		gc->num_ct = num_ct;
+		gc->irq_base = irq_base;
+		gc->reg_base = reg_base;
+		gc->chip_types->chip.name = name;
+		gc->chip_types->handler = handler;
+	}
+	return gc;
+}
+
+/*
+ * Separate lockdep class for interrupt chip which can nest irq_desc
+ * lock.
+ */
+static struct lock_class_key irq_nested_lock_class;
+
+/**
+ * irq_setup_generic_chip - Setup a range of interrupts with a generic chip
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @flags:	Flags for initialization
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Set up max. 32 interrupts starting from gc->irq_base. Note, this
+ * initializes all interrupts to the primary irq_chip_type and its
+ * associated handler.
+ */
+void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			    enum irq_gc_flags flags, unsigned int clr,
+			    unsigned int set)
+{
+	struct irq_chip_type *ct = gc->chip_types;
+	unsigned int i;
+
+	raw_spin_lock(&gc_lock);
+	list_add_tail(&gc->list, &gc_list);
+	raw_spin_unlock(&gc_lock);
+
+	/* Init mask cache ? */
+	if (flags & IRQ_GC_INIT_MASK_CACHE)
+		gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
+
+	for (i = gc->irq_base; msk; msk >>= 1, i++) {
+		if (!msk & 0x01)
+			continue;
+
+		if (flags & IRQ_GC_INIT_NESTED_LOCK)
+			irq_set_lockdep_class(i, &irq_nested_lock_class);
+
+		irq_set_chip_and_handler(i, &ct->chip, ct->handler);
+		irq_set_chip_data(i, gc);
+		irq_modify_status(i, clr, set);
+	}
+	gc->irq_cnt = i - gc->irq_base;
+}
+
+/**
+ * irq_setup_alt_chip - Switch to alternative chip
+ * @d:		irq_data for this interrupt
+ * @type	Flow type to be initialized
+ *
+ * Only to be called from chip->irq_set_type() callbacks.
+ */
+int irq_setup_alt_chip(struct irq_data *d, unsigned int type)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	struct irq_chip_type *ct = gc->chip_types;
+	unsigned int i;
+
+	for (i = 0; i < gc->num_ct; i++, ct++) {
+		if (ct->type & type) {
+			d->chip = &ct->chip;
+			irq_data_to_desc(d)->handle_irq = ct->handler;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+/**
+ * irq_remove_generic_chip - Remove a chip
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Remove up to 32 interrupts starting from gc->irq_base.
+ */
+void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
+			     unsigned int clr, unsigned int set)
+{
+	unsigned int i = gc->irq_base;
+
+	raw_spin_lock(&gc_lock);
+	list_del(&gc->list);
+	raw_spin_unlock(&gc_lock);
+
+	for (; msk; msk >>= 1, i++) {
+		if (!msk & 0x01)
+			continue;
+
+		/* Remove handler first. That will mask the irq line */
+		irq_set_handler(i, NULL);
+		irq_set_chip(i, &no_irq_chip);
+		irq_set_chip_data(i, NULL);
+		irq_modify_status(i, clr, set);
+	}
+}
+
+#ifdef CONFIG_PM
+static int irq_gc_suspend(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_suspend)
+			ct->chip.irq_suspend(irq_get_irq_data(gc->irq_base));
+	}
+	return 0;
+}
+
+static void irq_gc_resume(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_resume)
+			ct->chip.irq_resume(irq_get_irq_data(gc->irq_base));
+	}
+}
+#else
+#define irq_gc_suspend NULL
+#define irq_gc_resume NULL
+#endif
+
+static void irq_gc_shutdown(void)
+{
+	struct irq_chip_generic *gc;
+
+	list_for_each_entry(gc, &gc_list, list) {
+		struct irq_chip_type *ct = gc->chip_types;
+
+		if (ct->chip.irq_pm_shutdown)
+			ct->chip.irq_pm_shutdown(irq_get_irq_data(gc->irq_base));
+	}
+}
+
+static struct syscore_ops irq_gc_syscore_ops = {
+	.suspend = irq_gc_suspend,
+	.resume = irq_gc_resume,
+	.shutdown = irq_gc_shutdown,
+};
+
+static int __init irq_gc_init_ops(void)
+{
+	register_syscore_ops(&irq_gc_syscore_ops);
+	return 0;
+}
+device_initcall(irq_gc_init_ops);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2c039c9b9383..886e80347b32 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -22,7 +22,7 @@
  */
 static struct lock_class_key irq_desc_lock_class;
 
-#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
+#if defined(CONFIG_SMP)
 static void __init init_irq_default_affinity(void)
 {
 	alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
@@ -290,6 +290,22 @@ static int irq_expand_nr_irqs(unsigned int nr)
 
 #endif /* !CONFIG_SPARSE_IRQ */
 
+/**
+ * generic_handle_irq - Invoke the handler for a particular irq
+ * @irq:	The irq number to handle
+ *
+ */
+int generic_handle_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (!desc)
+		return -EINVAL;
+	generic_handle_irq_desc(irq, desc);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(generic_handle_irq);
+
 /* Dynamic interrupt handling */
 
 /**
@@ -311,6 +327,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt)
 	bitmap_clear(allocated_irqs, from, cnt);
 	mutex_unlock(&sparse_irq_lock);
 }
+EXPORT_SYMBOL_GPL(irq_free_descs);
 
 /**
  * irq_alloc_descs - allocate and initialize a range of irq descriptors
@@ -351,6 +368,7 @@ err:
 	mutex_unlock(&sparse_irq_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(irq_alloc_descs);
 
 /**
  * irq_reserve_irqs - mark irqs allocated
@@ -430,7 +448,6 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 			*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
 }
 
-#ifdef CONFIG_GENERIC_HARDIRQS
 unsigned int kstat_irqs(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -443,4 +460,3 @@ unsigned int kstat_irqs(unsigned int irq)
 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
 	return sum;
 }
-#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 07c1611f3899..f7ce0021e1c4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -900,7 +900,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		 */
 		new->handler = irq_nested_primary_handler;
 	} else {
-		irq_setup_forced_threading(new);
+		if (irq_settings_can_thread(desc))
+			irq_setup_forced_threading(new);
 	}
 
 	/*
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index dd201bd35103..834899f2500f 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -419,7 +419,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	} else {
 		seq_printf(p, " %8s", "None");
 	}
-#ifdef CONFIG_GENIRC_IRQ_SHOW_LEVEL
+#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
 	seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
 #endif
 	if (desc->name)
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 0d91730b6330..f1667833d444 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -8,6 +8,7 @@ enum {
 	_IRQ_LEVEL		= IRQ_LEVEL,
 	_IRQ_NOPROBE		= IRQ_NOPROBE,
 	_IRQ_NOREQUEST		= IRQ_NOREQUEST,
+	_IRQ_NOTHREAD		= IRQ_NOTHREAD,
 	_IRQ_NOAUTOEN		= IRQ_NOAUTOEN,
 	_IRQ_MOVE_PCNTXT	= IRQ_MOVE_PCNTXT,
 	_IRQ_NO_BALANCING	= IRQ_NO_BALANCING,
@@ -20,6 +21,7 @@ enum {
 #define IRQ_LEVEL		GOT_YOU_MORON
 #define IRQ_NOPROBE		GOT_YOU_MORON
 #define IRQ_NOREQUEST		GOT_YOU_MORON
+#define IRQ_NOTHREAD		GOT_YOU_MORON
 #define IRQ_NOAUTOEN		GOT_YOU_MORON
 #define IRQ_NESTED_THREAD	GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
@@ -94,6 +96,21 @@ static inline void irq_settings_set_norequest(struct irq_desc *desc)
 	desc->status_use_accessors |= _IRQ_NOREQUEST;
 }
 
+static inline bool irq_settings_can_thread(struct irq_desc *desc)
+{
+	return !(desc->status_use_accessors & _IRQ_NOTHREAD);
+}
+
+static inline void irq_settings_clr_nothread(struct irq_desc *desc)
+{
+	desc->status_use_accessors &= ~_IRQ_NOTHREAD;
+}
+
+static inline void irq_settings_set_nothread(struct irq_desc *desc)
+{
+	desc->status_use_accessors |= _IRQ_NOTHREAD;
+}
+
 static inline bool irq_settings_can_probe(struct irq_desc *desc)
 {
 	return !(desc->status_use_accessors & _IRQ_NOPROBE);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 3b79bd938330..74d1c099fbd1 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -2,43 +2,23 @@
  * jump label support
  *
  * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com>
  *
  */
-#include <linux/jump_label.h>
 #include <linux/memory.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/list.h>
-#include <linux/jhash.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/err.h>
+#include <linux/jump_label.h>
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_LABEL_HASH_BITS 6
-#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS)
-static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE];
-
 /* mutex to protect coming/going of the the jump_label table */
 static DEFINE_MUTEX(jump_label_mutex);
 
-struct jump_label_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	/* hang modules off here */
-	struct hlist_head modules;
-	unsigned long key;
-};
-
-struct jump_label_module_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	struct module *mod;
-};
-
 void jump_label_lock(void)
 {
 	mutex_lock(&jump_label_mutex);
@@ -49,6 +29,11 @@ void jump_label_unlock(void)
 	mutex_unlock(&jump_label_mutex);
 }
 
+bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
+
 static int jump_label_cmp(const void *a, const void *b)
 {
 	const struct jump_entry *jea = a;
@@ -64,7 +49,7 @@ static int jump_label_cmp(const void *a, const void *b)
 }
 
 static void
-sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
+jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 {
 	unsigned long size;
 
@@ -73,118 +58,25 @@ sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
 	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
 }
 
-static struct jump_label_entry *get_jump_label_entry(jump_label_t key)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct jump_label_entry *e;
-	u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (key == e->key)
-			return e;
-	}
-	return NULL;
-}
+static void jump_label_update(struct jump_label_key *key, int enable);
 
-static struct jump_label_entry *
-add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table)
+void jump_label_inc(struct jump_label_key *key)
 {
-	struct hlist_head *head;
-	struct jump_label_entry *e;
-	u32 hash;
-
-	e = get_jump_label_entry(key);
-	if (e)
-		return ERR_PTR(-EEXIST);
-
-	e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-
-	hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	e->key = key;
-	e->table = table;
-	e->nr_entries = nr_entries;
-	INIT_HLIST_HEAD(&(e->modules));
-	hlist_add_head(&e->hlist, head);
-	return e;
-}
+	if (atomic_inc_not_zero(&key->enabled))
+		return;
 
-static int
-build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop)
-{
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	int count;
-
-	sort_jump_label_entries(start, stop);
-	iter = start;
-	while (iter < stop) {
-		entry = get_jump_label_entry(iter->key);
-		if (!entry) {
-			iter_begin = iter;
-			count = 0;
-			while ((iter < stop) &&
-				(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-			}
-			entry = add_jump_label_entry(iter_begin->key,
-							count, iter_begin);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
-		 } else {
-			WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n");
-			return -1;
-		}
-	}
-	return 0;
+	jump_label_lock();
+	if (atomic_add_return(1, &key->enabled) == 1)
+		jump_label_update(key, JUMP_LABEL_ENABLE);
+	jump_label_unlock();
 }
 
-/***
- * jump_label_update - update jump label text
- * @key -  key value associated with a a jump label
- * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE
- *
- * Will enable/disable the jump for jump label @key, depending on the
- * value of @type.
- *
- */
-
-void jump_label_update(unsigned long key, enum jump_label_type type)
+void jump_label_dec(struct jump_label_key *key)
 {
-	struct jump_entry *iter;
-	struct jump_label_entry *entry;
-	struct hlist_node *module_node;
-	struct jump_label_module_entry *e_module;
-	int count;
+	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
+		return;
 
-	jump_label_lock();
-	entry = get_jump_label_entry((jump_label_t)key);
-	if (entry) {
-		count = entry->nr_entries;
-		iter = entry->table;
-		while (count--) {
-			if (kernel_text_address(iter->code))
-				arch_jump_label_transform(iter, type);
-			iter++;
-		}
-		/* eanble/disable jump labels in modules */
-		hlist_for_each_entry(e_module, module_node, &(entry->modules),
-							hlist) {
-			count = e_module->nr_entries;
-			iter = e_module->table;
-			while (count--) {
-				if (iter->key &&
-						kernel_text_address(iter->code))
-					arch_jump_label_transform(iter, type);
-				iter++;
-			}
-		}
-	}
+	jump_label_update(key, JUMP_LABEL_DISABLE);
 	jump_label_unlock();
 }
 
@@ -197,77 +89,33 @@ static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 	return 0;
 }
 
-#ifdef CONFIG_MODULES
-
-static int module_conflict(void *start, void *end)
+static int __jump_label_text_reserved(struct jump_entry *iter_start,
+		struct jump_entry *iter_stop, void *start, void *end)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
 	struct jump_entry *iter;
-	int i, count;
-	int conflict = 0;
-
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-							module_node_next,
-							&(e->modules), hlist) {
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (addr_conflict(iter, start, end)) {
-						conflict = 1;
-						goto out;
-					}
-					iter++;
-				}
-			}
-		}
-	}
-out:
-	return conflict;
-}
-
-#endif
-
-/***
- * jump_label_text_reserved - check if addr range is reserved
- * @start: start text addr
- * @end: end text addr
- *
- * checks if the text addr located between @start and @end
- * overlaps with any of the jump label patch addresses. Code
- * that wants to modify kernel text should first verify that
- * it does not overlap with any of the jump label addresses.
- * Caller must hold jump_label_mutex.
- *
- * returns 1 if there is an overlap, 0 otherwise
- */
-int jump_label_text_reserved(void *start, void *end)
-{
-	struct jump_entry *iter;
-	struct jump_entry *iter_start = __start___jump_table;
-	struct jump_entry *iter_stop = __start___jump_table;
-	int conflict = 0;
 
 	iter = iter_start;
 	while (iter < iter_stop) {
-		if (addr_conflict(iter, start, end)) {
-			conflict = 1;
-			goto out;
-		}
+		if (addr_conflict(iter, start, end))
+			return 1;
 		iter++;
 	}
 
-	/* now check modules */
-#ifdef CONFIG_MODULES
-	conflict = module_conflict(start, end);
-#endif
-out:
-	return conflict;
+	return 0;
+}
+
+static void __jump_label_update(struct jump_label_key *key,
+		struct jump_entry *entry, int enable)
+{
+	for (; entry->key == (jump_label_t)(unsigned long)key; entry++) {
+		/*
+		 * entry->code set to 0 invalidates module init text sections
+		 * kernel_text_address() verifies we are not in core kernel
+		 * init code, see jump_label_invalidate_module_init().
+		 */
+		if (entry->code && kernel_text_address(entry->code))
+			arch_jump_label_transform(entry, enable);
+	}
 }
 
 /*
@@ -277,142 +125,173 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
 {
 }
 
-static __init int init_jump_label(void)
+static __init int jump_label_init(void)
 {
-	int ret;
 	struct jump_entry *iter_start = __start___jump_table;
 	struct jump_entry *iter_stop = __stop___jump_table;
+	struct jump_label_key *key = NULL;
 	struct jump_entry *iter;
 
 	jump_label_lock();
-	ret = build_jump_label_hashtable(__start___jump_table,
-					 __stop___jump_table);
-	iter = iter_start;
-	while (iter < iter_stop) {
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
 		arch_jump_label_text_poke_early(iter->code);
-		iter++;
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+		atomic_set(&key->enabled, 0);
+		key->entries = iter;
+#ifdef CONFIG_MODULES
+		key->next = NULL;
+#endif
 	}
 	jump_label_unlock();
-	return ret;
+
+	return 0;
 }
-early_initcall(init_jump_label);
+early_initcall(jump_label_init);
 
 #ifdef CONFIG_MODULES
 
-static struct jump_label_module_entry *
-add_jump_label_module_entry(struct jump_label_entry *entry,
-			    struct jump_entry *iter_begin,
-			    int count, struct module *mod)
+struct jump_label_mod {
+	struct jump_label_mod *next;
+	struct jump_entry *entries;
+	struct module *mod;
+};
+
+static int __jump_label_mod_text_reserved(void *start, void *end)
+{
+	struct module *mod;
+
+	mod = __module_text_address((unsigned long)start);
+	if (!mod)
+		return 0;
+
+	WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
+
+	return __jump_label_text_reserved(mod->jump_entries,
+				mod->jump_entries + mod->num_jump_entries,
+				start, end);
+}
+
+static void __jump_label_mod_update(struct jump_label_key *key, int enable)
+{
+	struct jump_label_mod *mod = key->next;
+
+	while (mod) {
+		__jump_label_update(key, mod->entries, enable);
+		mod = mod->next;
+	}
+}
+
+/***
+ * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
+ * @mod: module to patch
+ *
+ * Allow for run-time selection of the optimal nops. Before the module
+ * loads patch these with arch_get_jump_label_nop(), which is specified by
+ * the arch specific jump label code.
+ */
+void jump_label_apply_nops(struct module *mod)
 {
-	struct jump_label_module_entry *e;
-
-	e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-	e->mod = mod;
-	e->nr_entries = count;
-	e->table = iter_begin;
-	hlist_add_head(&e->hlist, &entry->modules);
-	return e;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (iter_start == iter_stop)
+		return;
+
+	for (iter = iter_start; iter < iter_stop; iter++)
+		arch_jump_label_text_poke_early(iter->code);
 }
 
-static int add_jump_label_module(struct module *mod)
+static int jump_label_add_module(struct module *mod)
 {
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	struct jump_label_module_entry *module_entry;
-	int count;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm;
 
 	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
+	if (iter_start == iter_stop)
 		return 0;
 
-	sort_jump_label_entries(mod->jump_entries,
-				mod->jump_entries + mod->num_jump_entries);
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		entry = get_jump_label_entry(iter->key);
-		iter_begin = iter;
-		count = 0;
-		while ((iter < mod->jump_entries + mod->num_jump_entries) &&
-			(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-		}
-		if (!entry) {
-			entry = add_jump_label_entry(iter_begin->key, 0, NULL);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod) {
+			atomic_set(&key->enabled, 0);
+			key->entries = iter;
+			key->next = NULL;
+			continue;
 		}
-		module_entry = add_jump_label_module_entry(entry, iter_begin,
-							   count, mod);
-		if (IS_ERR(module_entry))
-			return PTR_ERR(module_entry);
+
+		jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL);
+		if (!jlm)
+			return -ENOMEM;
+
+		jlm->mod = mod;
+		jlm->entries = iter;
+		jlm->next = key->next;
+		key->next = jlm;
+
+		if (jump_label_enabled(key))
+			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
 	}
+
 	return 0;
 }
 
-static void remove_jump_label_module(struct module *mod)
+static void jump_label_del_module(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
-	int i;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm, **prev;
 
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod)
+			continue;
+
+		prev = &key->next;
+		jlm = key->next;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod == mod) {
-					hlist_del(&e_module->hlist);
-					kfree(e_module);
-				}
-			}
-			if (hlist_empty(&e->modules) && (e->nr_entries == 0)) {
-				hlist_del(&e->hlist);
-				kfree(e);
-			}
+		while (jlm && jlm->mod != mod) {
+			prev = &jlm->next;
+			jlm = jlm->next;
+		}
+
+		if (jlm) {
+			*prev = jlm->next;
+			kfree(jlm);
 		}
 	}
 }
 
-static void remove_jump_label_module_init(struct module *mod)
+static void jump_label_invalidate_module_init(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
 	struct jump_entry *iter;
-	int i, count;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod != mod)
-					continue;
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (within_module_init(iter->code, mod))
-						iter->key = 0;
-					iter++;
-				}
-			}
-		}
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (within_module_init(iter->code, mod))
+			iter->code = 0;
 	}
 }
 
@@ -426,59 +305,77 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 	switch (val) {
 	case MODULE_STATE_COMING:
 		jump_label_lock();
-		ret = add_jump_label_module(mod);
+		ret = jump_label_add_module(mod);
 		if (ret)
-			remove_jump_label_module(mod);
+			jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_GOING:
 		jump_label_lock();
-		remove_jump_label_module(mod);
+		jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_LIVE:
 		jump_label_lock();
-		remove_jump_label_module_init(mod);
+		jump_label_invalidate_module_init(mod);
 		jump_label_unlock();
 		break;
 	}
-	return ret;
-}
 
-/***
- * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
- * @mod: module to patch
- *
- * Allow for run-time selection of the optimal nops. Before the module
- * loads patch these with arch_get_jump_label_nop(), which is specified by
- * the arch specific jump label code.
- */
-void jump_label_apply_nops(struct module *mod)
-{
-	struct jump_entry *iter;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
-
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		arch_jump_label_text_poke_early(iter->code);
-		iter++;
-	}
+	return notifier_from_errno(ret);
 }
 
 struct notifier_block jump_label_module_nb = {
 	.notifier_call = jump_label_module_notify,
-	.priority = 0,
+	.priority = 1, /* higher than tracepoints */
 };
 
-static __init int init_jump_label_module(void)
+static __init int jump_label_init_module(void)
 {
 	return register_module_notifier(&jump_label_module_nb);
 }
-early_initcall(init_jump_label_module);
+early_initcall(jump_label_init_module);
 
 #endif /* CONFIG_MODULES */
 
+/***
+ * jump_label_text_reserved - check if addr range is reserved
+ * @start: start text addr
+ * @end: end text addr
+ *
+ * checks if the text addr located between @start and @end
+ * overlaps with any of the jump label patch addresses. Code
+ * that wants to modify kernel text should first verify that
+ * it does not overlap with any of the jump label addresses.
+ * Caller must hold jump_label_mutex.
+ *
+ * returns 1 if there is an overlap, 0 otherwise
+ */
+int jump_label_text_reserved(void *start, void *end)
+{
+	int ret = __jump_label_text_reserved(__start___jump_table,
+			__stop___jump_table, start, end);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MODULES
+	ret = __jump_label_mod_text_reserved(start, end);
+#endif
+	return ret;
+}
+
+static void jump_label_update(struct jump_label_key *key, int enable)
+{
+	struct jump_entry *entry = key->entries;
+
+	/* if there are no users, entry can be NULL */
+	if (entry)
+		__jump_label_update(key, entry, enable);
+
+#ifdef CONFIG_MODULES
+	__jump_label_mod_update(key, enable);
+#endif
+}
+
 #endif
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 55936f9cb251..8d814cbc8109 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -33,6 +33,7 @@
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
 #include <linux/kmsg_dump.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1530,8 +1531,7 @@ int kernel_kexec(void)
 		if (error)
 			goto Enable_cpus;
 		local_irq_disable();
-		/* Suspend system devices */
-		error = sysdev_suspend(PMSG_FREEZE);
+		error = syscore_suspend();
 		if (error)
 			goto Enable_irqs;
 	} else
@@ -1546,7 +1546,7 @@ int kernel_kexec(void)
 
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
-		sysdev_resume();
+		syscore_resume();
  Enable_irqs:
 		local_irq_enable();
  Enable_cpus:
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9cd0591c96a2..5ae0ff38425f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -245,7 +245,6 @@ static void __call_usermodehelper(struct work_struct *work)
 	}
 }
 
-#ifdef CONFIG_PM_SLEEP
 /*
  * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
  * (used for preventing user land processes from being created after the user
@@ -301,6 +300,15 @@ void usermodehelper_enable(void)
 	usermodehelper_disabled = 0;
 }
 
+/**
+ * usermodehelper_is_disabled - check if new helpers are allowed to be started
+ */
+bool usermodehelper_is_disabled(void)
+{
+	return usermodehelper_disabled;
+}
+EXPORT_SYMBOL_GPL(usermodehelper_is_disabled);
+
 static void helper_lock(void)
 {
 	atomic_inc(&running_helpers);
@@ -312,12 +320,6 @@ static void helper_unlock(void)
 	if (atomic_dec_and_test(&running_helpers))
 		wake_up(&running_helpers_waitq);
 }
-#else /* CONFIG_PM_SLEEP */
-#define usermodehelper_disabled	0
-
-static inline void helper_lock(void) {}
-static inline void helper_unlock(void) {}
-#endif /* CONFIG_PM_SLEEP */
 
 /**
  * call_usermodehelper_setup - prepare to call a usermode helper
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 0b624e791805..3b053c04dd86 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -16,6 +16,7 @@
 #include <linux/kexec.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
+#include <linux/capability.h>
 
 #define KERNEL_ATTR_RO(_name) \
 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
@@ -131,6 +132,14 @@ KERNEL_ATTR_RO(vmcoreinfo);
 
 #endif /* CONFIG_KEXEC */
 
+/* whether file capabilities are enabled */
+static ssize_t fscaps_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", file_caps_enabled);
+}
+KERNEL_ATTR_RO(fscaps);
+
 /*
  * Make /sys/kernel/notes give the raw contents of our kernel .notes section.
  */
@@ -158,6 +167,7 @@ struct kobject *kernel_kobj;
 EXPORT_SYMBOL_GPL(kernel_kobj);
 
 static struct attribute * kernel_attrs[] = {
+	&fscaps_attr.attr,
 #if defined(CONFIG_HOTPLUG)
 	&uevent_seqnum_attr.attr,
 	&uevent_helper_attr.attr,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 53a68956f131..63437d065ac8 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -490,6 +490,18 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
 	usage[i] = '\0';
 }
 
+static int __print_lock_name(struct lock_class *class)
+{
+	char str[KSYM_NAME_LEN];
+	const char *name;
+
+	name = class->name;
+	if (!name)
+		name = __get_key_name(class->key, str);
+
+	return printk("%s", name);
+}
+
 static void print_lock_name(struct lock_class *class)
 {
 	char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
@@ -1053,6 +1065,56 @@ print_circular_bug_entry(struct lock_list *target, int depth)
 	return 0;
 }
 
+static void
+print_circular_lock_scenario(struct held_lock *src,
+			     struct held_lock *tgt,
+			     struct lock_list *prt)
+{
+	struct lock_class *source = hlock_class(src);
+	struct lock_class *target = hlock_class(tgt);
+	struct lock_class *parent = prt->class;
+
+	/*
+	 * A direct locking problem where unsafe_class lock is taken
+	 * directly by safe_class lock, then all we need to show
+	 * is the deadlock scenario, as it is obvious that the
+	 * unsafe lock is taken under the safe lock.
+	 *
+	 * But if there is a chain instead, where the safe lock takes
+	 * an intermediate lock (middle_class) where this lock is
+	 * not the same as the safe lock, then the lock chain is
+	 * used to describe the problem. Otherwise we would need
+	 * to show a different CPU case for each link in the chain
+	 * from the safe_class lock to the unsafe_class lock.
+	 */
+	if (parent != source) {
+		printk("Chain exists of:\n  ");
+		__print_lock_name(source);
+		printk(" --> ");
+		__print_lock_name(parent);
+		printk(" --> ");
+		__print_lock_name(target);
+		printk("\n\n");
+	}
+
+	printk(" Possible unsafe locking scenario:\n\n");
+	printk("       CPU0                    CPU1\n");
+	printk("       ----                    ----\n");
+	printk("  lock(");
+	__print_lock_name(target);
+	printk(");\n");
+	printk("                               lock(");
+	__print_lock_name(parent);
+	printk(");\n");
+	printk("                               lock(");
+	__print_lock_name(target);
+	printk(");\n");
+	printk("  lock(");
+	__print_lock_name(source);
+	printk(");\n");
+	printk("\n *** DEADLOCK ***\n\n");
+}
+
 /*
  * When a circular dependency is detected, print the
  * header first:
@@ -1096,6 +1158,7 @@ static noinline int print_circular_bug(struct lock_list *this,
 {
 	struct task_struct *curr = current;
 	struct lock_list *parent;
+	struct lock_list *first_parent;
 	int depth;
 
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
@@ -1109,6 +1172,7 @@ static noinline int print_circular_bug(struct lock_list *this,
 	print_circular_bug_header(target, depth, check_src, check_tgt);
 
 	parent = get_lock_parent(target);
+	first_parent = parent;
 
 	while (parent) {
 		print_circular_bug_entry(parent, --depth);
@@ -1116,6 +1180,9 @@ static noinline int print_circular_bug(struct lock_list *this,
 	}
 
 	printk("\nother info that might help us debug this:\n\n");
+	print_circular_lock_scenario(check_src, check_tgt,
+				     first_parent);
+
 	lockdep_print_held_locks(curr);
 
 	printk("\nstack backtrace:\n");
@@ -1314,7 +1381,7 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
 		printk("\n");
 
 		if (depth == 0 && (entry != root)) {
-			printk("lockdep:%s bad BFS generated tree\n", __func__);
+			printk("lockdep:%s bad path found in chain graph\n", __func__);
 			break;
 		}
 
@@ -1325,6 +1392,62 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
 	return;
 }
 
+static void
+print_irq_lock_scenario(struct lock_list *safe_entry,
+			struct lock_list *unsafe_entry,
+			struct lock_class *prev_class,
+			struct lock_class *next_class)
+{
+	struct lock_class *safe_class = safe_entry->class;
+	struct lock_class *unsafe_class = unsafe_entry->class;
+	struct lock_class *middle_class = prev_class;
+
+	if (middle_class == safe_class)
+		middle_class = next_class;
+
+	/*
+	 * A direct locking problem where unsafe_class lock is taken
+	 * directly by safe_class lock, then all we need to show
+	 * is the deadlock scenario, as it is obvious that the
+	 * unsafe lock is taken under the safe lock.
+	 *
+	 * But if there is a chain instead, where the safe lock takes
+	 * an intermediate lock (middle_class) where this lock is
+	 * not the same as the safe lock, then the lock chain is
+	 * used to describe the problem. Otherwise we would need
+	 * to show a different CPU case for each link in the chain
+	 * from the safe_class lock to the unsafe_class lock.
+	 */
+	if (middle_class != unsafe_class) {
+		printk("Chain exists of:\n  ");
+		__print_lock_name(safe_class);
+		printk(" --> ");
+		__print_lock_name(middle_class);
+		printk(" --> ");
+		__print_lock_name(unsafe_class);
+		printk("\n\n");
+	}
+
+	printk(" Possible interrupt unsafe locking scenario:\n\n");
+	printk("       CPU0                    CPU1\n");
+	printk("       ----                    ----\n");
+	printk("  lock(");
+	__print_lock_name(unsafe_class);
+	printk(");\n");
+	printk("                               local_irq_disable();\n");
+	printk("                               lock(");
+	__print_lock_name(safe_class);
+	printk(");\n");
+	printk("                               lock(");
+	__print_lock_name(middle_class);
+	printk(");\n");
+	printk("  <Interrupt>\n");
+	printk("    lock(");
+	__print_lock_name(safe_class);
+	printk(");\n");
+	printk("\n *** DEADLOCK ***\n\n");
+}
+
 static int
 print_bad_irq_dependency(struct task_struct *curr,
 			 struct lock_list *prev_root,
@@ -1376,6 +1499,9 @@ print_bad_irq_dependency(struct task_struct *curr,
 	print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
 
 	printk("\nother info that might help us debug this:\n\n");
+	print_irq_lock_scenario(backwards_entry, forwards_entry,
+				hlock_class(prev), hlock_class(next));
+
 	lockdep_print_held_locks(curr);
 
 	printk("\nthe dependencies between %s-irq-safe lock", irqclass);
@@ -1539,6 +1665,26 @@ static inline void inc_chains(void)
 
 #endif
 
+static void
+print_deadlock_scenario(struct held_lock *nxt,
+			     struct held_lock *prv)
+{
+	struct lock_class *next = hlock_class(nxt);
+	struct lock_class *prev = hlock_class(prv);
+
+	printk(" Possible unsafe locking scenario:\n\n");
+	printk("       CPU0\n");
+	printk("       ----\n");
+	printk("  lock(");
+	__print_lock_name(prev);
+	printk(");\n");
+	printk("  lock(");
+	__print_lock_name(next);
+	printk(");\n");
+	printk("\n *** DEADLOCK ***\n\n");
+	printk(" May be due to missing lock nesting notation\n\n");
+}
+
 static int
 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 		   struct held_lock *next)
@@ -1557,6 +1703,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 	print_lock(prev);
 
 	printk("\nother info that might help us debug this:\n");
+	print_deadlock_scenario(next, prev);
 	lockdep_print_held_locks(curr);
 
 	printk("\nstack backtrace:\n");
@@ -1826,7 +1973,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
 	struct list_head *hash_head = chainhashentry(chain_key);
 	struct lock_chain *chain;
 	struct held_lock *hlock_curr, *hlock_next;
-	int i, j, n, cn;
+	int i, j;
 
 	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
 		return 0;
@@ -1886,15 +2033,9 @@ cache_hit:
 	}
 	i++;
 	chain->depth = curr->lockdep_depth + 1 - i;
-	cn = nr_chain_hlocks;
-	while (cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS) {
-		n = cmpxchg(&nr_chain_hlocks, cn, cn + chain->depth);
-		if (n == cn)
-			break;
-		cn = n;
-	}
-	if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
-		chain->base = cn;
+	if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
+		chain->base = nr_chain_hlocks;
+		nr_chain_hlocks += chain->depth;
 		for (j = 0; j < chain->depth - 1; j++, i++) {
 			int lock_id = curr->held_locks[i].class_idx - 1;
 			chain_hlocks[chain->base + j] = lock_id;
@@ -2011,6 +2152,24 @@ static void check_chain_key(struct task_struct *curr)
 #endif
 }
 
+static void
+print_usage_bug_scenario(struct held_lock *lock)
+{
+	struct lock_class *class = hlock_class(lock);
+
+	printk(" Possible unsafe locking scenario:\n\n");
+	printk("       CPU0\n");
+	printk("       ----\n");
+	printk("  lock(");
+	__print_lock_name(class);
+	printk(");\n");
+	printk("  <Interrupt>\n");
+	printk("    lock(");
+	__print_lock_name(class);
+	printk(");\n");
+	printk("\n *** DEADLOCK ***\n\n");
+}
+
 static int
 print_usage_bug(struct task_struct *curr, struct held_lock *this,
 		enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
@@ -2039,6 +2198,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 
 	print_irqtrace_events(curr);
 	printk("\nother info that might help us debug this:\n");
+	print_usage_bug_scenario(this);
+
 	lockdep_print_held_locks(curr);
 
 	printk("\nstack backtrace:\n");
@@ -2073,6 +2234,10 @@ print_irq_inversion_bug(struct task_struct *curr,
 			struct held_lock *this, int forwards,
 			const char *irqclass)
 {
+	struct lock_list *entry = other;
+	struct lock_list *middle = NULL;
+	int depth;
+
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
@@ -2091,6 +2256,25 @@ print_irq_inversion_bug(struct task_struct *curr,
 	printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
 
 	printk("\nother info that might help us debug this:\n");
+
+	/* Find a middle lock (if one exists) */
+	depth = get_lock_depth(other);
+	do {
+		if (depth == 0 && (entry != root)) {
+			printk("lockdep:%s bad path found in chain graph\n", __func__);
+			break;
+		}
+		middle = entry;
+		entry = get_lock_parent(entry);
+		depth--;
+	} while (entry && entry != root && (depth >= 0));
+	if (forwards)
+		print_irq_lock_scenario(root, other,
+			middle ? middle->class : root->class, other->class);
+	else
+		print_irq_lock_scenario(other, root,
+			middle ? middle->class : other->class, root->class);
+
 	lockdep_print_held_locks(curr);
 
 	printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
diff --git a/kernel/module.c b/kernel/module.c
index d5938a5c19c4..22879725678d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -57,6 +57,7 @@
 #include <linux/kmemleak.h>
 #include <linux/jump_label.h>
 #include <linux/pfn.h>
+#include <linux/bsearch.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/module.h>
@@ -240,23 +241,24 @@ static bool each_symbol_in_section(const struct symsearch *arr,
 				   struct module *owner,
 				   bool (*fn)(const struct symsearch *syms,
 					      struct module *owner,
-					      unsigned int symnum, void *data),
+					      void *data),
 				   void *data)
 {
-	unsigned int i, j;
+	unsigned int j;
 
 	for (j = 0; j < arrsize; j++) {
-		for (i = 0; i < arr[j].stop - arr[j].start; i++)
-			if (fn(&arr[j], owner, i, data))
-				return true;
+		if (fn(&arr[j], owner, data))
+			return true;
 	}
 
 	return false;
 }
 
 /* Returns true as soon as fn returns true, otherwise false. */
-bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
-			    unsigned int symnum, void *data), void *data)
+bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
+				    struct module *owner,
+				    void *data),
+			 void *data)
 {
 	struct module *mod;
 	static const struct symsearch arr[] = {
@@ -309,7 +311,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
 	}
 	return false;
 }
-EXPORT_SYMBOL_GPL(each_symbol);
+EXPORT_SYMBOL_GPL(each_symbol_section);
 
 struct find_symbol_arg {
 	/* Input */
@@ -323,15 +325,12 @@ struct find_symbol_arg {
 	const struct kernel_symbol *sym;
 };
 
-static bool find_symbol_in_section(const struct symsearch *syms,
-				   struct module *owner,
-				   unsigned int symnum, void *data)
+static bool check_symbol(const struct symsearch *syms,
+				 struct module *owner,
+				 unsigned int symnum, void *data)
 {
 	struct find_symbol_arg *fsa = data;
 
-	if (strcmp(syms->start[symnum].name, fsa->name) != 0)
-		return false;
-
 	if (!fsa->gplok) {
 		if (syms->licence == GPL_ONLY)
 			return false;
@@ -365,6 +364,30 @@ static bool find_symbol_in_section(const struct symsearch *syms,
 	return true;
 }
 
+static int cmp_name(const void *va, const void *vb)
+{
+	const char *a;
+	const struct kernel_symbol *b;
+	a = va; b = vb;
+	return strcmp(a, b->name);
+}
+
+static bool find_symbol_in_section(const struct symsearch *syms,
+				   struct module *owner,
+				   void *data)
+{
+	struct find_symbol_arg *fsa = data;
+	struct kernel_symbol *sym;
+
+	sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
+			sizeof(struct kernel_symbol), cmp_name);
+
+	if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data))
+		return true;
+
+	return false;
+}
+
 /* Find a symbol and return it, along with, (optional) crc and
  * (optional) module which owns it.  Needs preempt disabled or module_mutex. */
 const struct kernel_symbol *find_symbol(const char *name,
@@ -379,7 +402,7 @@ const struct kernel_symbol *find_symbol(const char *name,
 	fsa.gplok = gplok;
 	fsa.warn = warn;
 
-	if (each_symbol(find_symbol_in_section, &fsa)) {
+	if (each_symbol_section(find_symbol_in_section, &fsa)) {
 		if (owner)
 			*owner = fsa.owner;
 		if (crc)
@@ -1607,27 +1630,28 @@ static void set_section_ro_nx(void *base,
 	}
 }
 
-/* Setting memory back to RW+NX before releasing it */
-void unset_section_ro_nx(struct module *mod, void *module_region)
+static void unset_module_core_ro_nx(struct module *mod)
 {
-	unsigned long total_pages;
-
-	if (mod->module_core == module_region) {
-		/* Set core as NX+RW */
-		total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size);
-		set_memory_nx((unsigned long)mod->module_core, total_pages);
-		set_memory_rw((unsigned long)mod->module_core, total_pages);
+	set_page_attributes(mod->module_core + mod->core_text_size,
+		mod->module_core + mod->core_size,
+		set_memory_x);
+	set_page_attributes(mod->module_core,
+		mod->module_core + mod->core_ro_size,
+		set_memory_rw);
+}
 
-	} else if (mod->module_init == module_region) {
-		/* Set init as NX+RW */
-		total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size);
-		set_memory_nx((unsigned long)mod->module_init, total_pages);
-		set_memory_rw((unsigned long)mod->module_init, total_pages);
-	}
+static void unset_module_init_ro_nx(struct module *mod)
+{
+	set_page_attributes(mod->module_init + mod->init_text_size,
+		mod->module_init + mod->init_size,
+		set_memory_x);
+	set_page_attributes(mod->module_init,
+		mod->module_init + mod->init_ro_size,
+		set_memory_rw);
 }
 
 /* Iterate through all modules and set each module's text as RW */
-void set_all_modules_text_rw()
+void set_all_modules_text_rw(void)
 {
 	struct module *mod;
 
@@ -1648,7 +1672,7 @@ void set_all_modules_text_rw()
 }
 
 /* Iterate through all modules and set each module's text as RO */
-void set_all_modules_text_ro()
+void set_all_modules_text_ro(void)
 {
 	struct module *mod;
 
@@ -1669,7 +1693,8 @@ void set_all_modules_text_ro()
 }
 #else
 static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { }
-static inline void unset_section_ro_nx(struct module *mod, void *module_region) { }
+static void unset_module_core_ro_nx(struct module *mod) { }
+static void unset_module_init_ro_nx(struct module *mod) { }
 #endif
 
 /* Free a module, remove from lists, etc. */
@@ -1696,7 +1721,7 @@ static void free_module(struct module *mod)
 	destroy_params(mod->kp, mod->num_kp);
 
 	/* This may be NULL, but that's OK */
-	unset_section_ro_nx(mod, mod->module_init);
+	unset_module_init_ro_nx(mod);
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
 	percpu_modfree(mod);
@@ -1705,7 +1730,7 @@ static void free_module(struct module *mod)
 	lockdep_free_key_range(mod->module_core, mod->core_size);
 
 	/* Finally, free the core (containing the module structure) */
-	unset_section_ro_nx(mod, mod->module_core);
+	unset_module_core_ro_nx(mod);
 	module_free(mod, mod->module_core);
 
 #ifdef CONFIG_MPU
@@ -2030,11 +2055,8 @@ static const struct kernel_symbol *lookup_symbol(const char *name,
 	const struct kernel_symbol *start,
 	const struct kernel_symbol *stop)
 {
-	const struct kernel_symbol *ks = start;
-	for (; ks < stop; ks++)
-		if (strcmp(ks->name, name) == 0)
-			return ks;
-	return NULL;
+	return bsearch(name, start, stop - start,
+			sizeof(struct kernel_symbol), cmp_name);
 }
 
 static int is_exported(const char *name, unsigned long value,
@@ -2931,10 +2953,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	mod->symtab = mod->core_symtab;
 	mod->strtab = mod->core_strtab;
 #endif
-	unset_section_ro_nx(mod, mod->module_init);
+	unset_module_init_ro_nx(mod);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
+	mod->init_ro_size = 0;
 	mod->init_text_size = 0;
 	mutex_unlock(&module_mutex);
 
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index ec815a960b5d..73da83aff418 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock)
 		return;
 
 	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
+	DEBUG_LOCKS_WARN_ON(lock->owner != current);
 	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
 	mutex_clear_owner(lock);
 }
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 57d527a16f9d..0799fd3e4cfa 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
 
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current_thread_info();
+	lock->owner = current;
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index c4195fa98900..2c938e2337cd 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -160,14 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 */
 
 	for (;;) {
-		struct thread_info *owner;
-
-		/*
-		 * If we own the BKL, then don't spin. The owner of
-		 * the mutex might be waiting on us to release the BKL.
-		 */
-		if (unlikely(current->lock_depth >= 0))
-			break;
+		struct task_struct *owner;
 
 		/*
 		 * If there's an owner, wait for it to either
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 67578ca48f94..4115fbf83b12 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -19,7 +19,7 @@
 #ifdef CONFIG_SMP
 static inline void mutex_set_owner(struct mutex *lock)
 {
-	lock->owner = current_thread_info();
+	lock->owner = current;
 }
 
 static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/params.c b/kernel/params.c
index 7ab388a48a2e..ed72e1330862 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -297,21 +297,15 @@ EXPORT_SYMBOL(param_ops_charp);
 int param_set_bool(const char *val, const struct kernel_param *kp)
 {
 	bool v;
+	int ret;
 
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
 	/* One of =[yYnN01] */
-	switch (val[0]) {
-	case 'y': case 'Y': case '1':
-		v = true;
-		break;
-	case 'n': case 'N': case '0':
-		v = false;
-		break;
-	default:
-		return -EINVAL;
-	}
+	ret = strtobool(val, &v);
+	if (ret)
+		return ret;
 
 	if (kp->flags & KPARAM_ISBOOL)
 		*(bool *)kp->arg = v;
@@ -821,15 +815,18 @@ ssize_t __modver_version_show(struct module_attribute *mattr,
 	return sprintf(buf, "%s\n", vattr->version);
 }
 
-extern struct module_version_attribute __start___modver[], __stop___modver[];
+extern const struct module_version_attribute *__start___modver[];
+extern const struct module_version_attribute *__stop___modver[];
 
 static void __init version_sysfs_builtin(void)
 {
-	const struct module_version_attribute *vattr;
+	const struct module_version_attribute **p;
 	struct module_kobject *mk;
 	int err;
 
-	for (vattr = __start___modver; vattr < __stop___modver; vattr++) {
+	for (p = __start___modver; p < __stop___modver; p++) {
+		const struct module_version_attribute *vattr = *p;
+
 		mk = locate_module_kobject(vattr->module_name);
 		if (mk) {
 			err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
diff --git a/kernel/pid.c b/kernel/pid.c
index 02f221274265..57a8346a270e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 	return -1;
 }
 
-int next_pidmap(struct pid_namespace *pid_ns, int last)
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
 {
 	int offset;
 	struct pidmap *map, *end;
 
+	if (last >= PID_MAX_LIMIT)
+		return -1;
+
 	offset = (last + 1) & BITS_PER_PAGE_MASK;
 	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
 	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 4603f08dc47b..87f4d24b55b0 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,9 +18,13 @@ config SUSPEND_FREEZER
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HIBERNATE_CALLBACKS
+	bool
+
 config HIBERNATION
 	bool "Hibernation (aka 'suspend to disk')"
 	depends on SWAP && ARCH_HIBERNATION_POSSIBLE
+	select HIBERNATE_CALLBACKS
 	select LZO_COMPRESS
 	select LZO_DECOMPRESS
 	---help---
@@ -85,7 +89,7 @@ config PM_STD_PARTITION
 
 config PM_SLEEP
 	def_bool y
-	depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
+	depends on SUSPEND || HIBERNATE_CALLBACKS
 
 config PM_SLEEP_SMP
 	def_bool y
@@ -121,12 +125,6 @@ config PM_DEBUG
 	code. This is helpful when debugging and reporting PM bugs, like
 	suspend support.
 
-config PM_VERBOSE
-	bool "Verbose Power Management debugging"
-	depends on PM_DEBUG
-	---help---
-	This option enables verbose messages from the Power Management code.
-
 config PM_ADVANCED_DEBUG
 	bool "Extra PM attributes in sysfs for low-level debugging/testing"
 	depends on PM_DEBUG
@@ -225,3 +223,7 @@ config PM_OPP
 	  representing individual voltage domains and provides SOC
 	  implementations a ready to use framework to manage OPPs.
 	  For more information, read <file:Documentation/power/opp.txt>
+
+config PM_RUNTIME_CLK
+	def_bool y
+	depends on PM_RUNTIME && HAVE_CLK
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aeabd26e3342..f9bec56d8825 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -272,9 +272,7 @@ static int create_image(int platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_FREEZE);
-	if (!error)
-		error = syscore_suspend();
+	error = syscore_suspend();
 	if (error) {
 		printk(KERN_ERR "PM: Some system devices failed to power down, "
 			"aborting hibernation\n");
@@ -299,7 +297,6 @@ static int create_image(int platform_mode)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
@@ -330,20 +327,25 @@ static int create_image(int platform_mode)
 
 int hibernation_snapshot(int platform_mode)
 {
+	pm_message_t msg = PMSG_RECOVER;
 	int error;
 
 	error = platform_begin(platform_mode);
 	if (error)
 		goto Close;
 
+	error = dpm_prepare(PMSG_FREEZE);
+	if (error)
+		goto Complete_devices;
+
 	/* Preallocate image memory before shutting down devices. */
 	error = hibernate_preallocate_memory();
 	if (error)
-		goto Close;
+		goto Complete_devices;
 
 	suspend_console();
 	pm_restrict_gfp_mask();
-	error = dpm_suspend_start(PMSG_FREEZE);
+	error = dpm_suspend(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
 
@@ -361,13 +363,17 @@ int hibernation_snapshot(int platform_mode)
 	if (error || !in_suspend)
 		swsusp_free();
 
-	dpm_resume_end(in_suspend ?
-		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE;
+	dpm_resume(msg);
 
 	if (error || !in_suspend)
 		pm_restore_gfp_mask();
 
 	resume_console();
+
+ Complete_devices:
+	dpm_complete(msg);
+
  Close:
 	platform_end(platform_mode);
 	return error;
@@ -406,9 +412,7 @@ static int resume_target_kernel(bool platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_QUIESCE);
-	if (!error)
-		error = syscore_suspend();
+	error = syscore_suspend();
 	if (error)
 		goto Enable_irqs;
 
@@ -436,7 +440,6 @@ static int resume_target_kernel(bool platform_mode)
 	touch_softlockup_watchdog();
 
 	syscore_resume();
-	sysdev_resume();
 
  Enable_irqs:
 	local_irq_enable();
@@ -522,7 +525,6 @@ int hibernation_platform_enter(void)
 		goto Platform_finish;
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_HIBERNATE);
 	syscore_suspend();
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
@@ -535,7 +537,6 @@ int hibernation_platform_enter(void)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 	enable_nonboot_cpus();
 
@@ -976,10 +977,33 @@ static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *att
 
 power_attr(image_size);
 
+static ssize_t reserved_size_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", reserved_size);
+}
+
+static ssize_t reserved_size_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t n)
+{
+	unsigned long size;
+
+	if (sscanf(buf, "%lu", &size) == 1) {
+		reserved_size = size;
+		return n;
+	}
+
+	return -EINVAL;
+}
+
+power_attr(reserved_size);
+
 static struct attribute * g[] = {
 	&disk_attr.attr,
 	&resume_attr.attr,
 	&image_size_attr.attr,
+	&reserved_size_attr.attr,
 	NULL,
 };
 
diff --git a/kernel/power/main.c b/kernel/power/main.c
index de9aef8742f4..2981af4ce7cb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -337,6 +337,7 @@ static int __init pm_init(void)
 	if (error)
 		return error;
 	hibernate_image_size_init();
+	hibernate_reserved_size_init();
 	power_kobj = kobject_create_and_add("power", NULL);
 	if (!power_kobj)
 		return -ENOMEM;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 03634be55f62..9a00a0a26280 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -15,6 +15,7 @@ struct swsusp_info {
 
 #ifdef CONFIG_HIBERNATION
 /* kernel/power/snapshot.c */
+extern void __init hibernate_reserved_size_init(void);
 extern void __init hibernate_image_size_init(void);
 
 #ifdef CONFIG_ARCH_HIBERNATION_HEADER
@@ -55,6 +56,7 @@ extern int hibernation_platform_enter(void);
 
 #else /* !CONFIG_HIBERNATION */
 
+static inline void hibernate_reserved_size_init(void) {}
 static inline void hibernate_image_size_init(void) {}
 #endif /* !CONFIG_HIBERNATION */
 
@@ -72,6 +74,8 @@ static struct kobj_attribute _name##_attr = {	\
 
 /* Preferred image size in bytes (default 500 MB) */
 extern unsigned long image_size;
+/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */
+extern unsigned long reserved_size;
 extern int in_suspend;
 extern dev_t swsusp_resume_device;
 extern sector_t swsusp_resume_block;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index ca0aacc24874..ace55889f702 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -41,16 +41,28 @@ static void swsusp_set_page_forbidden(struct page *);
 static void swsusp_unset_page_forbidden(struct page *);
 
 /*
+ * Number of bytes to reserve for memory allocations made by device drivers
+ * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
+ * cause image creation to fail (tunable via /sys/power/reserved_size).
+ */
+unsigned long reserved_size;
+
+void __init hibernate_reserved_size_init(void)
+{
+	reserved_size = SPARE_PAGES * PAGE_SIZE;
+}
+
+/*
  * Preferred image size in bytes (tunable via /sys/power/image_size).
- * When it is set to N, the image creating code will do its best to
- * ensure the image size will not exceed N bytes, but if that is
- * impossible, it will try to create the smallest image possible.
+ * When it is set to N, swsusp will do its best to ensure the image
+ * size will not exceed N bytes, but if that is impossible, it will
+ * try to create the smallest image possible.
  */
 unsigned long image_size;
 
 void __init hibernate_image_size_init(void)
 {
-	image_size = (totalram_pages / 3) * PAGE_SIZE;
+	image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
 }
 
 /* List of PBEs needed for restoring the pages that were allocated before
@@ -1263,11 +1275,13 @@ static unsigned long minimum_image_size(unsigned long saveable)
  * frame in use.  We also need a number of page frames to be free during
  * hibernation for allocations made while saving the image and for device
  * drivers, in case they need to allocate memory from their hibernation
- * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES,
- * respectively, both of which are rough estimates).  To make this happen, we
- * compute the total number of available page frames and allocate at least
+ * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
+ * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
+ * /sys/power/reserved_size, respectively).  To make this happen, we compute the
+ * total number of available page frames and allocate at least
  *
- * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES
+ * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
+ *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
  *
  * of them, which corresponds to the maximum size of a hibernation image.
  *
@@ -1322,7 +1336,8 @@ int hibernate_preallocate_memory(void)
 	count -= totalreserve_pages;
 
 	/* Compute the maximum number of saveable pages to leave in memory. */
-	max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES;
+	max_size = (count - (size + PAGES_FOR_IO)) / 2
+			- 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
 	/* Compute the desired number of image pages specified by image_size. */
 	size = DIV_ROUND_UP(image_size, PAGE_SIZE);
 	if (size > max_size)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 2814c32aed51..1c41ba215419 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,16 +163,13 @@ static int suspend_enter(suspend_state_t state)
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
 
-	error = sysdev_suspend(PMSG_SUSPEND);
-	if (!error)
-		error = syscore_suspend();
+	error = syscore_suspend();
 	if (!error) {
 		if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
 			error = suspend_ops->enter(state);
 			events_check_enabled = false;
 		}
 		syscore_resume();
-		sysdev_resume();
 	}
 
 	arch_suspend_enable_irqs();
@@ -213,7 +210,6 @@ int suspend_devices_and_enter(suspend_state_t state)
 			goto Close;
 	}
 	suspend_console();
-	pm_restrict_gfp_mask();
 	suspend_test_start();
 	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
@@ -224,13 +220,12 @@ int suspend_devices_and_enter(suspend_state_t state)
 	if (suspend_test(TEST_DEVICES))
 		goto Recover_platform;
 
-	suspend_enter(state);
+	error = suspend_enter(state);
 
  Resume_devices:
 	suspend_test_start();
 	dpm_resume_end(PMSG_RESUME);
 	suspend_test_finish("resume devices");
-	pm_restore_gfp_mask();
 	resume_console();
  Close:
 	if (suspend_ops->end)
@@ -291,7 +286,9 @@ int enter_state(suspend_state_t state)
 		goto Finish;
 
 	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
+	pm_restrict_gfp_mask();
 	error = suspend_devices_and_enter(state);
+	pm_restore_gfp_mask();
 
  Finish:
 	pr_debug("PM: Finishing wakeup.\n");
diff --git a/kernel/power/user.c b/kernel/power/user.c
index c36c3b9e8a84..7d02d33be699 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -135,8 +135,10 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 	free_basic_memory_bitmaps();
 	data = filp->private_data;
 	free_all_swap_pages(data->swap);
-	if (data->frozen)
+	if (data->frozen) {
+		pm_restore_gfp_mask();
 		thaw_processes();
+	}
 	pm_notifier_call_chain(data->mode == O_RDONLY ?
 			PM_POST_HIBERNATION : PM_POST_RESTORE);
 	atomic_inc(&snapshot_device_available);
@@ -379,6 +381,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		 * PM_HIBERNATION_PREPARE
 		 */
 		error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+		data->ready = 0;
 		break;
 
 	case SNAPSHOT_PLATFORM_SUPPORT:
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0fc1eed28d27..7a81fc071344 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
 
 
 /*
@@ -37,35 +38,33 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 	child->parent = new_parent;
 }
 
-/*
- * Turn a tracing stop into a normal stop now, since with no tracer there
- * would be no way to wake it up with SIGCONT or SIGKILL.  If there was a
- * signal sent that would resume the child, but didn't because it was in
- * TASK_TRACED, resume it now.
- * Requires that irqs be disabled.
- */
-static void ptrace_untrace(struct task_struct *child)
-{
-	spin_lock(&child->sighand->siglock);
-	if (task_is_traced(child)) {
-		/*
-		 * If the group stop is completed or in progress,
-		 * this thread was already counted as stopped.
-		 */
-		if (child->signal->flags & SIGNAL_STOP_STOPPED ||
-		    child->signal->group_stop_count)
-			__set_task_state(child, TASK_STOPPED);
-		else
-			signal_wake_up(child, 1);
-	}
-	spin_unlock(&child->sighand->siglock);
-}
-
-/*
- * unptrace a task: move it back to its original parent and
- * remove it from the ptrace list.
+/**
+ * __ptrace_unlink - unlink ptracee and restore its execution state
+ * @child: ptracee to be unlinked
  *
- * Must be called with the tasklist lock write-held.
+ * Remove @child from the ptrace list, move it back to the original parent,
+ * and restore the execution state so that it conforms to the group stop
+ * state.
+ *
+ * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
+ * exiting.  For PTRACE_DETACH, unless the ptracee has been killed between
+ * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED.
+ * If the ptracer is exiting, the ptracee can be in any state.
+ *
+ * After detach, the ptracee should be in a state which conforms to the
+ * group stop.  If the group is stopped or in the process of stopping, the
+ * ptracee should be put into TASK_STOPPED; otherwise, it should be woken
+ * up from TASK_TRACED.
+ *
+ * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED,
+ * it goes through TRACED -> RUNNING -> STOPPED transition which is similar
+ * to but in the opposite direction of what happens while attaching to a
+ * stopped task.  However, in this direction, the intermediate RUNNING
+ * state is not hidden even from the current ptracer and if it immediately
+ * re-attaches and performs a WNOHANG wait(2), it may fail.
+ *
+ * CONTEXT:
+ * write_lock_irq(tasklist_lock)
  */
 void __ptrace_unlink(struct task_struct *child)
 {
@@ -75,8 +74,27 @@ void __ptrace_unlink(struct task_struct *child)
 	child->parent = child->real_parent;
 	list_del_init(&child->ptrace_entry);
 
-	if (task_is_traced(child))
-		ptrace_untrace(child);
+	spin_lock(&child->sighand->siglock);
+
+	/*
+	 * Reinstate GROUP_STOP_PENDING if group stop is in effect and
+	 * @child isn't dead.
+	 */
+	if (!(child->flags & PF_EXITING) &&
+	    (child->signal->flags & SIGNAL_STOP_STOPPED ||
+	     child->signal->group_stop_count))
+		child->group_stop |= GROUP_STOP_PENDING;
+
+	/*
+	 * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
+	 * @child in the butt.  Note that @resume should be used iff @child
+	 * is in TASK_TRACED; otherwise, we might unduly disrupt
+	 * TASK_KILLABLE sleeps.
+	 */
+	if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child))
+		signal_wake_up(child, task_is_traced(child));
+
+	spin_unlock(&child->sighand->siglock);
 }
 
 /*
@@ -95,16 +113,14 @@ int ptrace_check_attach(struct task_struct *child, int kill)
 	 */
 	read_lock(&tasklist_lock);
 	if ((child->ptrace & PT_PTRACED) && child->parent == current) {
-		ret = 0;
 		/*
 		 * child->sighand can't be NULL, release_task()
 		 * does ptrace_unlink() before __exit_signal().
 		 */
 		spin_lock_irq(&child->sighand->siglock);
-		if (task_is_stopped(child))
-			child->state = TASK_TRACED;
-		else if (!task_is_traced(child) && !kill)
-			ret = -ESRCH;
+		WARN_ON_ONCE(task_is_stopped(child));
+		if (task_is_traced(child) || kill)
+			ret = 0;
 		spin_unlock_irq(&child->sighand->siglock);
 	}
 	read_unlock(&tasklist_lock);
@@ -168,6 +184,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 
 static int ptrace_attach(struct task_struct *task)
 {
+	bool wait_trap = false;
 	int retval;
 
 	audit_ptrace(task);
@@ -207,12 +224,42 @@ static int ptrace_attach(struct task_struct *task)
 	__ptrace_link(task, current);
 	send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
 
+	spin_lock(&task->sighand->siglock);
+
+	/*
+	 * If the task is already STOPPED, set GROUP_STOP_PENDING and
+	 * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
+	 * will be cleared if the child completes the transition or any
+	 * event which clears the group stop states happens.  We'll wait
+	 * for the transition to complete before returning from this
+	 * function.
+	 *
+	 * This hides STOPPED -> RUNNING -> TRACED transition from the
+	 * attaching thread but a different thread in the same group can
+	 * still observe the transient RUNNING state.  IOW, if another
+	 * thread's WNOHANG wait(2) on the stopped tracee races against
+	 * ATTACH, the wait(2) may fail due to the transient RUNNING.
+	 *
+	 * The following task_is_stopped() test is safe as both transitions
+	 * in and out of STOPPED are protected by siglock.
+	 */
+	if (task_is_stopped(task)) {
+		task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
+		signal_wake_up(task, 1);
+		wait_trap = true;
+	}
+
+	spin_unlock(&task->sighand->siglock);
+
 	retval = 0;
 unlock_tasklist:
 	write_unlock_irq(&tasklist_lock);
 unlock_creds:
 	mutex_unlock(&task->signal->cred_guard_mutex);
 out:
+	if (wait_trap)
+		wait_event(current->signal->wait_chldexit,
+			   !(task->group_stop & GROUP_STOP_TRAPPING));
 	return retval;
 }
 
@@ -315,8 +362,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 	if (child->ptrace) {
 		child->exit_code = data;
 		dead = __ptrace_detach(current, child);
-		if (!child->exit_state)
-			wake_up_state(child, TASK_TRACED | TASK_STOPPED);
 	}
 	write_unlock_irq(&tasklist_lock);
 
@@ -879,3 +924,19 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 	return ret;
 }
 #endif	/* CONFIG_COMPAT */
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+int ptrace_get_breakpoints(struct task_struct *tsk)
+{
+	if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt))
+		return 0;
+
+	return -1;
+}
+
+void ptrace_put_breakpoints(struct task_struct *tsk)
+{
+	if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt))
+		flush_ptrace_hw_breakpoint(tsk);
+}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f3240e987928..7784bd216b6a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -142,10 +142,17 @@ static int rcuhead_fixup_init(void *addr, enum debug_obj_state state)
 		 * Ensure that queued callbacks are all executed.
 		 * If we detect that we are nested in a RCU read-side critical
 		 * section, we should simply fail, otherwise we would deadlock.
+		 * In !PREEMPT configurations, there is no way to tell if we are
+		 * in a RCU read-side critical section or not, so we never
+		 * attempt any fixup and just print a warning.
 		 */
+#ifndef CONFIG_PREEMPT
+		WARN_ON_ONCE(1);
+		return 0;
+#endif
 		if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
 		    irqs_disabled()) {
-			WARN_ON(1);
+			WARN_ON_ONCE(1);
 			return 0;
 		}
 		rcu_barrier();
@@ -184,10 +191,17 @@ static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
 		 * Ensure that queued callbacks are all executed.
 		 * If we detect that we are nested in a RCU read-side critical
 		 * section, we should simply fail, otherwise we would deadlock.
+		 * In !PREEMPT configurations, there is no way to tell if we are
+		 * in a RCU read-side critical section or not, so we never
+		 * attempt any fixup and just print a warning.
 		 */
+#ifndef CONFIG_PREEMPT
+		WARN_ON_ONCE(1);
+		return 0;
+#endif
 		if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
 		    irqs_disabled()) {
-			WARN_ON(1);
+			WARN_ON_ONCE(1);
 			return 0;
 		}
 		rcu_barrier();
@@ -214,15 +228,17 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
 		 * Ensure that queued callbacks are all executed.
 		 * If we detect that we are nested in a RCU read-side critical
 		 * section, we should simply fail, otherwise we would deadlock.
-		 * Note that the machinery to reliably determine whether
-		 * or not we are in an RCU read-side critical section
-		 * exists only in the preemptible RCU implementations
-		 * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why
-		 * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT.
+		 * In !PREEMPT configurations, there is no way to tell if we are
+		 * in a RCU read-side critical section or not, so we never
+		 * attempt any fixup and just print a warning.
 		 */
+#ifndef CONFIG_PREEMPT
+		WARN_ON_ONCE(1);
+		return 0;
+#endif
 		if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
 		    irqs_disabled()) {
-			WARN_ON(1);
+			WARN_ON_ONCE(1);
 			return 0;
 		}
 		rcu_barrier();
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0c343b9a46d5..7bbac7d0f5ab 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -35,15 +35,16 @@
 #include <linux/init.h>
 #include <linux/time.h>
 #include <linux/cpu.h>
+#include <linux/prefetch.h>
 
 /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
 static struct task_struct *rcu_kthread_task;
 static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
 static unsigned long have_rcu_kthread_work;
-static void invoke_rcu_kthread(void);
 
 /* Forward declarations for rcutiny_plugin.h. */
 struct rcu_ctrlblk;
+static void invoke_rcu_kthread(void);
 static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
 static int rcu_kthread(void *arg);
 static void __call_rcu(struct rcu_head *head,
@@ -79,36 +80,45 @@ void rcu_exit_nohz(void)
 #endif /* #ifdef CONFIG_NO_HZ */
 
 /*
- * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc().
- * Also disable irqs to avoid confusion due to interrupt handlers
+ * Helper function for rcu_sched_qs() and rcu_bh_qs().
+ * Also irqs are disabled to avoid confusion due to interrupt handlers
  * invoking call_rcu().
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
 	if (rcp->rcucblist != NULL &&
 	    rcp->donetail != rcp->curtail) {
 		rcp->donetail = rcp->curtail;
-		local_irq_restore(flags);
 		return 1;
 	}
-	local_irq_restore(flags);
 
 	return 0;
 }
 
 /*
+ * Wake up rcu_kthread() to process callbacks now eligible for invocation
+ * or to boost readers.
+ */
+static void invoke_rcu_kthread(void)
+{
+	have_rcu_kthread_work = 1;
+	wake_up(&rcu_kthread_wq);
+}
+
+/*
  * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
  * are at it, given that any rcu quiescent state is also an rcu_bh
  * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
  */
 void rcu_sched_qs(int cpu)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
 	    rcu_qsctr_help(&rcu_bh_ctrlblk))
 		invoke_rcu_kthread();
+	local_irq_restore(flags);
 }
 
 /*
@@ -116,8 +126,12 @@ void rcu_sched_qs(int cpu)
  */
 void rcu_bh_qs(int cpu)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
 		invoke_rcu_kthread();
+	local_irq_restore(flags);
 }
 
 /*
@@ -167,7 +181,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
 		local_bh_disable();
-		list->func(list);
+		__rcu_reclaim(list);
 		local_bh_enable();
 		list = next;
 		RCU_TRACE(cb_count++);
@@ -208,20 +222,6 @@ static int rcu_kthread(void *arg)
 }
 
 /*
- * Wake up rcu_kthread() to process callbacks now eligible for invocation
- * or to boost readers.
- */
-static void invoke_rcu_kthread(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	have_rcu_kthread_work = 1;
-	wake_up(&rcu_kthread_wq);
-	local_irq_restore(flags);
-}
-
-/*
  * Wait for a grace period to elapse.  But it is illegal to invoke
  * synchronize_sched() from within an RCU read-side critical section.
  * Therefore, any legal call to synchronize_sched() is a quiescent
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 3cb8e362e883..f259c676195f 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -100,23 +100,28 @@ struct rcu_preempt_ctrlblk {
 	u8 completed;		/* Last grace period completed. */
 				/*  If all three are equal, RCU is idle. */
 #ifdef CONFIG_RCU_BOOST
-	s8 boosted_this_gp;	/* Has boosting already happened? */
 	unsigned long boost_time; /* When to start boosting (jiffies) */
 #endif /* #ifdef CONFIG_RCU_BOOST */
 #ifdef CONFIG_RCU_TRACE
 	unsigned long n_grace_periods;
 #ifdef CONFIG_RCU_BOOST
 	unsigned long n_tasks_boosted;
+				/* Total number of tasks boosted. */
 	unsigned long n_exp_boosts;
+				/* Number of tasks boosted for expedited GP. */
 	unsigned long n_normal_boosts;
-	unsigned long n_normal_balk_blkd_tasks;
-	unsigned long n_normal_balk_gp_tasks;
-	unsigned long n_normal_balk_boost_tasks;
-	unsigned long n_normal_balk_boosted;
-	unsigned long n_normal_balk_notyet;
-	unsigned long n_normal_balk_nos;
-	unsigned long n_exp_balk_blkd_tasks;
-	unsigned long n_exp_balk_nos;
+				/* Number of tasks boosted for normal GP. */
+	unsigned long n_balk_blkd_tasks;
+				/* Refused to boost: no blocked tasks. */
+	unsigned long n_balk_exp_gp_tasks;
+				/* Refused to boost: nothing blocking GP. */
+	unsigned long n_balk_boost_tasks;
+				/* Refused to boost: already boosting. */
+	unsigned long n_balk_notyet;
+				/* Refused to boost: not yet time. */
+	unsigned long n_balk_nos;
+				/* Refused to boost: not sure why, though. */
+				/*  This can happen due to race conditions. */
 #endif /* #ifdef CONFIG_RCU_BOOST */
 #endif /* #ifdef CONFIG_RCU_TRACE */
 };
@@ -201,7 +206,6 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t)
 
 #ifdef CONFIG_RCU_BOOST
 static void rcu_initiate_boost_trace(void);
-static void rcu_initiate_exp_boost_trace(void);
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
 /*
@@ -219,41 +223,21 @@ static void show_tiny_preempt_stats(struct seq_file *m)
 		   "N."[!rcu_preempt_ctrlblk.gp_tasks],
 		   "E."[!rcu_preempt_ctrlblk.exp_tasks]);
 #ifdef CONFIG_RCU_BOOST
-	seq_printf(m, "             ttb=%c btg=",
-		   "B."[!rcu_preempt_ctrlblk.boost_tasks]);
-	switch (rcu_preempt_ctrlblk.boosted_this_gp) {
-	case -1:
-		seq_puts(m, "exp");
-		break;
-	case 0:
-		seq_puts(m, "no");
-		break;
-	case 1:
-		seq_puts(m, "begun");
-		break;
-	case 2:
-		seq_puts(m, "done");
-		break;
-	default:
-		seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
-	}
-	seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
+	seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
+		   "             ",
+		   "B."[!rcu_preempt_ctrlblk.boost_tasks],
 		   rcu_preempt_ctrlblk.n_tasks_boosted,
 		   rcu_preempt_ctrlblk.n_exp_boosts,
 		   rcu_preempt_ctrlblk.n_normal_boosts,
 		   (int)(jiffies & 0xffff),
 		   (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
-	seq_printf(m, "             %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n",
-		   "normal balk",
-		   rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
-		   rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
-		   rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
-		   rcu_preempt_ctrlblk.n_normal_balk_boosted,
-		   rcu_preempt_ctrlblk.n_normal_balk_notyet,
-		   rcu_preempt_ctrlblk.n_normal_balk_nos);
-	seq_printf(m, "             exp balk: bt=%lu nos=%lu\n",
-		   rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
-		   rcu_preempt_ctrlblk.n_exp_balk_nos);
+	seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
+		   "             balk",
+		   rcu_preempt_ctrlblk.n_balk_blkd_tasks,
+		   rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
+		   rcu_preempt_ctrlblk.n_balk_boost_tasks,
+		   rcu_preempt_ctrlblk.n_balk_notyet,
+		   rcu_preempt_ctrlblk.n_balk_nos);
 #endif /* #ifdef CONFIG_RCU_BOOST */
 }
 
@@ -271,25 +255,59 @@ static int rcu_boost(void)
 {
 	unsigned long flags;
 	struct rt_mutex mtx;
-	struct list_head *np;
 	struct task_struct *t;
+	struct list_head *tb;
 
-	if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+	if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
+	    rcu_preempt_ctrlblk.exp_tasks == NULL)
 		return 0;  /* Nothing to boost. */
+
 	raw_local_irq_save(flags);
-	rcu_preempt_ctrlblk.boosted_this_gp++;
-	t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
-			 rcu_node_entry);
-	np = rcu_next_node_entry(t);
+
+	/*
+	 * Recheck with irqs disabled: all tasks in need of boosting
+	 * might exit their RCU read-side critical sections on their own
+	 * if we are preempted just before disabling irqs.
+	 */
+	if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
+	    rcu_preempt_ctrlblk.exp_tasks == NULL) {
+		raw_local_irq_restore(flags);
+		return 0;
+	}
+
+	/*
+	 * Preferentially boost tasks blocking expedited grace periods.
+	 * This cannot starve the normal grace periods because a second
+	 * expedited grace period must boost all blocked tasks, including
+	 * those blocking the pre-existing normal grace period.
+	 */
+	if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
+		tb = rcu_preempt_ctrlblk.exp_tasks;
+		RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
+	} else {
+		tb = rcu_preempt_ctrlblk.boost_tasks;
+		RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
+	}
+	RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
+
+	/*
+	 * We boost task t by manufacturing an rt_mutex that appears to
+	 * be held by task t.  We leave a pointer to that rt_mutex where
+	 * task t can find it, and task t will release the mutex when it
+	 * exits its outermost RCU read-side critical section.  Then
+	 * simply acquiring this artificial rt_mutex will boost task
+	 * t's priority.  (Thanks to tglx for suggesting this approach!)
+	 */
+	t = container_of(tb, struct task_struct, rcu_node_entry);
 	rt_mutex_init_proxy_locked(&mtx, t);
 	t->rcu_boost_mutex = &mtx;
 	t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
 	raw_local_irq_restore(flags);
 	rt_mutex_lock(&mtx);
-	RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
-	rcu_preempt_ctrlblk.boosted_this_gp++;
-	rt_mutex_unlock(&mtx);
-	return rcu_preempt_ctrlblk.boost_tasks != NULL;
+	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
+
+	return rcu_preempt_ctrlblk.boost_tasks != NULL ||
+	       rcu_preempt_ctrlblk.exp_tasks != NULL;
 }
 
 /*
@@ -304,42 +322,25 @@ static int rcu_boost(void)
  */
 static int rcu_initiate_boost(void)
 {
-	if (!rcu_preempt_blocked_readers_cgp()) {
-		RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
+	if (!rcu_preempt_blocked_readers_cgp() &&
+	    rcu_preempt_ctrlblk.exp_tasks == NULL) {
+		RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
 		return 0;
 	}
-	if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
-	    rcu_preempt_ctrlblk.boost_tasks == NULL &&
-	    rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
-	    ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
-		rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
+	if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
+	    (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+	     rcu_preempt_ctrlblk.boost_tasks == NULL &&
+	     ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
+		if (rcu_preempt_ctrlblk.exp_tasks == NULL)
+			rcu_preempt_ctrlblk.boost_tasks =
+				rcu_preempt_ctrlblk.gp_tasks;
 		invoke_rcu_kthread();
-		RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
 	} else
 		RCU_TRACE(rcu_initiate_boost_trace());
 	return 1;
 }
 
-/*
- * Initiate boosting for an expedited grace period.
- */
-static void rcu_initiate_expedited_boost(void)
-{
-	unsigned long flags;
-
-	raw_local_irq_save(flags);
-	if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
-		rcu_preempt_ctrlblk.boost_tasks =
-			rcu_preempt_ctrlblk.blkd_tasks.next;
-		rcu_preempt_ctrlblk.boosted_this_gp = -1;
-		invoke_rcu_kthread();
-		RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
-	} else
-		RCU_TRACE(rcu_initiate_exp_boost_trace());
-	raw_local_irq_restore(flags);
-}
-
-#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
 
 /*
  * Do priority-boost accounting for the start of a new grace period.
@@ -347,8 +348,6 @@ static void rcu_initiate_expedited_boost(void)
 static void rcu_preempt_boost_start_gp(void)
 {
 	rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
-	if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
-		rcu_preempt_ctrlblk.boosted_this_gp = 0;
 }
 
 #else /* #ifdef CONFIG_RCU_BOOST */
@@ -372,13 +371,6 @@ static int rcu_initiate_boost(void)
 }
 
 /*
- * If there is no RCU priority boosting, we don't initiate expedited boosting.
- */
-static void rcu_initiate_expedited_boost(void)
-{
-}
-
-/*
  * If there is no RCU priority boosting, nothing to do at grace-period start.
  */
 static void rcu_preempt_boost_start_gp(void)
@@ -418,7 +410,7 @@ static void rcu_preempt_cpu_qs(void)
 	if (!rcu_preempt_gp_in_progress())
 		return;
 	/*
-	 * Check up on boosting.  If there are no readers blocking the
+	 * Check up on boosting.  If there are readers blocking the
 	 * current grace period, leave.
 	 */
 	if (rcu_initiate_boost())
@@ -578,7 +570,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		empty = !rcu_preempt_blocked_readers_cgp();
 		empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
 		np = rcu_next_node_entry(t);
-		list_del(&t->rcu_node_entry);
+		list_del_init(&t->rcu_node_entry);
 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
 			rcu_preempt_ctrlblk.gp_tasks = np;
 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
@@ -587,7 +579,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
 			rcu_preempt_ctrlblk.boost_tasks = np;
 #endif /* #ifdef CONFIG_RCU_BOOST */
-		INIT_LIST_HEAD(&t->rcu_node_entry);
 
 		/*
 		 * If this was the last task on the current list, and if
@@ -812,13 +803,16 @@ void synchronize_rcu_expedited(void)
 	rpcp->exp_tasks = rpcp->blkd_tasks.next;
 	if (rpcp->exp_tasks == &rpcp->blkd_tasks)
 		rpcp->exp_tasks = NULL;
-	local_irq_restore(flags);
 
 	/* Wait for tail of ->blkd_tasks list to drain. */
-	if (rcu_preempted_readers_exp())
-		rcu_initiate_expedited_boost();
+	if (!rcu_preempted_readers_exp())
+		local_irq_restore(flags);
+	else {
+		rcu_initiate_boost();
+		local_irq_restore(flags);
 		wait_event(sync_rcu_preempt_exp_wq,
 			   !rcu_preempted_readers_exp());
+	}
 
 	/* Clean up and exit. */
 	barrier(); /* ensure expedited GP seen before counter increment. */
@@ -931,24 +925,17 @@ void __init rcu_scheduler_starting(void)
 
 static void rcu_initiate_boost_trace(void)
 {
-	if (rcu_preempt_ctrlblk.gp_tasks == NULL)
-		rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
+	if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
+		rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
+	else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
+		 rcu_preempt_ctrlblk.exp_tasks == NULL)
+		rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
 	else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
-		rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
-	else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
-		rcu_preempt_ctrlblk.n_normal_balk_boosted++;
+		rcu_preempt_ctrlblk.n_balk_boost_tasks++;
 	else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
-		rcu_preempt_ctrlblk.n_normal_balk_notyet++;
-	else
-		rcu_preempt_ctrlblk.n_normal_balk_nos++;
-}
-
-static void rcu_initiate_exp_boost_trace(void)
-{
-	if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
-		rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
+		rcu_preempt_ctrlblk.n_balk_notyet++;
 	else
-		rcu_preempt_ctrlblk.n_exp_balk_nos++;
+		rcu_preempt_ctrlblk.n_balk_nos++;
 }
 
 #endif /* #ifdef CONFIG_RCU_BOOST */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index c224da41890c..2e138db03382 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -131,7 +131,7 @@ struct rcu_torture {
 
 static LIST_HEAD(rcu_torture_freelist);
 static struct rcu_torture __rcu *rcu_torture_current;
-static long rcu_torture_current_version;
+static unsigned long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -146,8 +146,6 @@ static atomic_t n_rcu_torture_mberror;
 static atomic_t n_rcu_torture_error;
 static long n_rcu_torture_boost_ktrerror;
 static long n_rcu_torture_boost_rterror;
-static long n_rcu_torture_boost_allocerror;
-static long n_rcu_torture_boost_afferror;
 static long n_rcu_torture_boost_failure;
 static long n_rcu_torture_boosts;
 static long n_rcu_torture_timers;
@@ -163,11 +161,11 @@ static int stutter_pause_test;
 #endif
 int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
 
-#ifdef CONFIG_RCU_BOOST
+#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
 #define rcu_can_boost() 1
-#else /* #ifdef CONFIG_RCU_BOOST */
+#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
 #define rcu_can_boost() 0
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
+#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
 
 static unsigned long boost_starttime;	/* jiffies of next boost test start. */
 DEFINE_MUTEX(boost_mutex);		/* protect setting boost_starttime */
@@ -751,6 +749,7 @@ static int rcu_torture_boost(void *arg)
 		n_rcu_torture_boost_rterror++;
 	}
 
+	init_rcu_head_on_stack(&rbi.rcu);
 	/* Each pass through the following loop does one boost-test cycle. */
 	do {
 		/* Wait for the next test interval. */
@@ -810,6 +809,7 @@ checkwait:	rcu_stutter_wait("rcu_torture_boost");
 
 	/* Clean up and exit. */
 	VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
+	destroy_rcu_head_on_stack(&rbi.rcu);
 	rcutorture_shutdown_absorb("rcu_torture_boost");
 	while (!kthread_should_stop() || rbi.inflight)
 		schedule_timeout_uninterruptible(1);
@@ -886,7 +886,7 @@ rcu_torture_writer(void *arg)
 			old_rp->rtort_pipe_count++;
 			cur_ops->deferred_free(old_rp);
 		}
-		rcu_torture_current_version++;
+		rcutorture_record_progress(++rcu_torture_current_version);
 		oldbatch = cur_ops->completed();
 		rcu_stutter_wait("rcu_torture_writer");
 	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1066,8 +1066,8 @@ rcu_torture_printk(char *page)
 	}
 	cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
 	cnt += sprintf(&page[cnt],
-		       "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
-		       "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld "
+		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
+		       "rtmbe: %d rtbke: %ld rtbre: %ld "
 		       "rtbf: %ld rtb: %ld nt: %ld",
 		       rcu_torture_current,
 		       rcu_torture_current_version,
@@ -1078,16 +1078,12 @@ rcu_torture_printk(char *page)
 		       atomic_read(&n_rcu_torture_mberror),
 		       n_rcu_torture_boost_ktrerror,
 		       n_rcu_torture_boost_rterror,
-		       n_rcu_torture_boost_allocerror,
-		       n_rcu_torture_boost_afferror,
 		       n_rcu_torture_boost_failure,
 		       n_rcu_torture_boosts,
 		       n_rcu_torture_timers);
 	if (atomic_read(&n_rcu_torture_mberror) != 0 ||
 	    n_rcu_torture_boost_ktrerror != 0 ||
 	    n_rcu_torture_boost_rterror != 0 ||
-	    n_rcu_torture_boost_allocerror != 0 ||
-	    n_rcu_torture_boost_afferror != 0 ||
 	    n_rcu_torture_boost_failure != 0)
 		cnt += sprintf(&page[cnt], " !!!");
 	cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
@@ -1331,6 +1327,7 @@ rcu_torture_cleanup(void)
 	int i;
 
 	mutex_lock(&fullstop_mutex);
+	rcutorture_record_test_transition();
 	if (fullstop == FULLSTOP_SHUTDOWN) {
 		printk(KERN_WARNING /* but going down anyway, so... */
 		       "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
@@ -1486,8 +1483,6 @@ rcu_torture_init(void)
 	atomic_set(&n_rcu_torture_error, 0);
 	n_rcu_torture_boost_ktrerror = 0;
 	n_rcu_torture_boost_rterror = 0;
-	n_rcu_torture_boost_allocerror = 0;
-	n_rcu_torture_boost_afferror = 0;
 	n_rcu_torture_boost_failure = 0;
 	n_rcu_torture_boosts = 0;
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -1624,6 +1619,7 @@ rcu_torture_init(void)
 		}
 	}
 	register_reboot_notifier(&rcutorture_shutdown_nb);
+	rcutorture_record_test_transition();
 	mutex_unlock(&fullstop_mutex);
 	return 0;
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dd4aea806f8e..f07d2f03181a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,9 @@
 #include <linux/mutex.h>
 #include <linux/time.h>
 #include <linux/kernel_stat.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
+#include <linux/prefetch.h>
 
 #include "rcutree.h"
 
@@ -79,10 +82,41 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
+static struct rcu_state *rcu_state;
+
 int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 
 /*
+ * Control variables for per-CPU and per-rcu_node kthreads.  These
+ * handle all flavors of RCU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
+DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
+static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
+DEFINE_PER_CPU(char, rcu_cpu_has_work);
+static char rcu_kthreads_spawnable;
+
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
+static void invoke_rcu_cpu_kthread(void);
+
+#define RCU_KTHREAD_PRIO 1	/* RT priority for per-CPU kthreads. */
+
+/*
+ * Track the rcutorture test sequence number and the update version
+ * number within a given test.  The rcutorture_testseq is incremented
+ * on every rcutorture module load and unload, so has an odd value
+ * when a test is running.  The rcutorture_vernum is set to zero
+ * when rcutorture starts and is incremented on each rcutorture update.
+ * These variables enable correlating rcutorture output with the
+ * RCU tracing information.
+ */
+unsigned long rcutorture_testseq;
+unsigned long rcutorture_vernum;
+
+/*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
  * structure's ->lock, but of course results can be subject to change.
@@ -124,6 +158,7 @@ void rcu_note_context_switch(int cpu)
 	rcu_sched_qs(cpu);
 	rcu_preempt_note_context_switch(cpu);
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 #ifdef CONFIG_NO_HZ
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
@@ -140,10 +175,8 @@ module_param(blimit, int, 0);
 module_param(qhimark, int, 0);
 module_param(qlowmark, int, 0);
 
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
+int rcu_cpu_stall_suppress __read_mostly;
 module_param(rcu_cpu_stall_suppress, int, 0644);
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
 static int rcu_pending(int cpu);
@@ -176,6 +209,31 @@ void rcu_bh_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 
 /*
+ * Record the number of times rcutorture tests have been initiated and
+ * terminated.  This information allows the debugfs tracing stats to be
+ * correlated to the rcutorture messages, even when the rcutorture module
+ * is being repeatedly loaded and unloaded.  In other words, we cannot
+ * store this state in rcutorture itself.
+ */
+void rcutorture_record_test_transition(void)
+{
+	rcutorture_testseq++;
+	rcutorture_vernum = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
+
+/*
+ * Record the number of writer passes through the current rcutorture test.
+ * This is also used to correlate debugfs tracing stats with the rcutorture
+ * messages.
+ */
+void rcutorture_record_progress(unsigned long vernum)
+{
+	rcutorture_vernum++;
+}
+EXPORT_SYMBOL_GPL(rcutorture_record_progress);
+
+/*
  * Force a quiescent state for RCU-sched.
  */
 void rcu_sched_force_quiescent_state(void)
@@ -234,8 +292,8 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
 		return 1;
 	}
 
-	/* If preemptable RCU, no point in sending reschedule IPI. */
-	if (rdp->preemptable)
+	/* If preemptible RCU, no point in sending reschedule IPI. */
+	if (rdp->preemptible)
 		return 0;
 
 	/* The CPU is online, so send it a reschedule IPI. */
@@ -450,8 +508,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 
 #endif /* #else #ifdef CONFIG_NO_HZ */
 
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-
 int rcu_cpu_stall_suppress __read_mostly;
 
 static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -537,21 +593,24 @@ static void print_cpu_stall(struct rcu_state *rsp)
 
 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-	long delta;
+	unsigned long j;
+	unsigned long js;
 	struct rcu_node *rnp;
 
 	if (rcu_cpu_stall_suppress)
 		return;
-	delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
+	j = ACCESS_ONCE(jiffies);
+	js = ACCESS_ONCE(rsp->jiffies_stall);
 	rnp = rdp->mynode;
-	if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) {
+	if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
 
 		/* We haven't checked in, so go dump stack. */
 		print_cpu_stall(rsp);
 
-	} else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) {
+	} else if (rcu_gp_in_progress(rsp) &&
+		   ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
 
-		/* They had two time units to dump stack, so complain. */
+		/* They had a few time units to dump stack, so complain. */
 		print_other_cpu_stall(rsp);
 	}
 }
@@ -587,26 +646,6 @@ static void __init check_cpu_stall_init(void)
 	atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
 }
 
-#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-static void record_gp_stall_check_time(struct rcu_state *rsp)
-{
-}
-
-static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-}
-
-void rcu_cpu_stall_reset(void)
-{
-}
-
-static void __init check_cpu_stall_init(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
 /*
  * Update CPU-local rcu_data state to record the newly noticed grace period.
  * This is used both when we started the grace period and when we notice
@@ -809,6 +848,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 		rnp->completed = rsp->completed;
 		rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
 		rcu_start_gp_per_cpu(rsp, rnp, rdp);
+		rcu_preempt_boost_start_gp(rnp);
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		return;
 	}
@@ -844,6 +884,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 		rnp->completed = rsp->completed;
 		if (rnp == rdp->mynode)
 			rcu_start_gp_per_cpu(rsp, rnp, rdp);
+		rcu_preempt_boost_start_gp(rnp);
 		raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */
 	}
 
@@ -864,7 +905,12 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 	__releases(rcu_get_root(rsp)->lock)
 {
+	unsigned long gp_duration;
+
 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
+	gp_duration = jiffies - rsp->gp_start;
+	if (gp_duration > rsp->gp_max)
+		rsp->gp_max = gp_duration;
 	rsp->completed = rsp->gpnum;
 	rsp->signaled = RCU_GP_IDLE;
 	rcu_start_gp(rsp, flags);  /* releases root node's rnp->lock. */
@@ -894,7 +940,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
 			return;
 		}
 		rnp->qsmask &= ~mask;
-		if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
+		if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
 
 			/* Other bits still set at this level, so done. */
 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1037,6 +1083,8 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp)
 /*
  * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
  * and move all callbacks from the outgoing CPU to the current one.
+ * There can only be one CPU hotplug operation at a time, so no other
+ * CPU can be attempting to update rcu_cpu_kthread_task.
  */
 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 {
@@ -1045,6 +1093,14 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 	int need_report = 0;
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp;
+	struct task_struct *t;
+
+	/* Stop the CPU's kthread. */
+	t = per_cpu(rcu_cpu_kthread_task, cpu);
+	if (t != NULL) {
+		per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
+		kthread_stop(t);
+	}
 
 	/* Exclude any attempts to start a new grace period. */
 	raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -1082,6 +1138,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	if (need_report & RCU_OFL_TASKS_EXP_GP)
 		rcu_report_exp_rnp(rsp, rnp);
+	rcu_node_kthread_setaffinity(rnp, -1);
 }
 
 /*
@@ -1143,7 +1200,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 		next = list->next;
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
-		list->func(list);
+		__rcu_reclaim(list);
 		list = next;
 		if (++count >= rdp->blimit)
 			break;
@@ -1179,7 +1236,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 
 	/* Re-raise the RCU softirq if there are callbacks remaining. */
 	if (cpu_has_callbacks_ready_to_invoke(rdp))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_cpu_kthread();
 }
 
 /*
@@ -1225,7 +1282,7 @@ void rcu_check_callbacks(int cpu, int user)
 	}
 	rcu_preempt_check_callbacks(cpu);
 	if (rcu_pending(cpu))
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_cpu_kthread();
 }
 
 #ifdef CONFIG_SMP
@@ -1233,6 +1290,8 @@ void rcu_check_callbacks(int cpu, int user)
 /*
  * Scan the leaf rcu_node structures, processing dyntick state for any that
  * have not yet encountered a quiescent state, using the function specified.
+ * Also initiate boosting for any threads blocked on the root rcu_node.
+ *
  * The caller must have suppressed start of new grace periods.
  */
 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
@@ -1251,7 +1310,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
 			return;
 		}
 		if (rnp->qsmask == 0) {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
 			continue;
 		}
 		cpu = rnp->grplo;
@@ -1269,6 +1328,11 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
 		}
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	}
+	rnp = rcu_get_root(rsp);
+	if (rnp->qsmask == 0) {
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
+	}
 }
 
 /*
@@ -1389,7 +1453,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 /*
  * Do softirq processing for the current CPU.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static void rcu_process_callbacks(void)
 {
 	/*
 	 * Memory references from any prior RCU read-side critical sections
@@ -1414,6 +1478,347 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	rcu_needs_cpu_flush();
 }
 
+/*
+ * Wake up the current CPU's kthread.  This replaces raise_softirq()
+ * in earlier versions of RCU.  Note that because we are running on
+ * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
+ * cannot disappear out from under us.
+ */
+static void invoke_rcu_cpu_kthread(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__this_cpu_write(rcu_cpu_has_work, 1);
+	if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
+		local_irq_restore(flags);
+		return;
+	}
+	wake_up(&__get_cpu_var(rcu_cpu_wq));
+	local_irq_restore(flags);
+}
+
+/*
+ * Wake up the specified per-rcu_node-structure kthread.
+ * Because the per-rcu_node kthreads are immortal, we don't need
+ * to do anything to keep them alive.
+ */
+static void invoke_rcu_node_kthread(struct rcu_node *rnp)
+{
+	struct task_struct *t;
+
+	t = rnp->node_kthread_task;
+	if (t != NULL)
+		wake_up_process(t);
+}
+
+/*
+ * Set the specified CPU's kthread to run RT or not, as specified by
+ * the to_rt argument.  The CPU-hotplug locks are held, so the task
+ * is not going away.
+ */
+static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
+{
+	int policy;
+	struct sched_param sp;
+	struct task_struct *t;
+
+	t = per_cpu(rcu_cpu_kthread_task, cpu);
+	if (t == NULL)
+		return;
+	if (to_rt) {
+		policy = SCHED_FIFO;
+		sp.sched_priority = RCU_KTHREAD_PRIO;
+	} else {
+		policy = SCHED_NORMAL;
+		sp.sched_priority = 0;
+	}
+	sched_setscheduler_nocheck(t, policy, &sp);
+}
+
+/*
+ * Timer handler to initiate the waking up of per-CPU kthreads that
+ * have yielded the CPU due to excess numbers of RCU callbacks.
+ * We wake up the per-rcu_node kthread, which in turn will wake up
+ * the booster kthread.
+ */
+static void rcu_cpu_kthread_timer(unsigned long arg)
+{
+	unsigned long flags;
+	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
+	struct rcu_node *rnp = rdp->mynode;
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	rnp->wakemask |= rdp->grpmask;
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	invoke_rcu_node_kthread(rnp);
+}
+
+/*
+ * Drop to non-real-time priority and yield, but only after posting a
+ * timer that will cause us to regain our real-time priority if we
+ * remain preempted.  Either way, we restore our real-time priority
+ * before returning.
+ */
+static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
+{
+	struct sched_param sp;
+	struct timer_list yield_timer;
+
+	setup_timer_on_stack(&yield_timer, f, arg);
+	mod_timer(&yield_timer, jiffies + 2);
+	sp.sched_priority = 0;
+	sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
+	set_user_nice(current, 19);
+	schedule();
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+	del_timer(&yield_timer);
+}
+
+/*
+ * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
+ * This can happen while the corresponding CPU is either coming online
+ * or going offline.  We cannot wait until the CPU is fully online
+ * before starting the kthread, because the various notifier functions
+ * can wait for RCU grace periods.  So we park rcu_cpu_kthread() until
+ * the corresponding CPU is online.
+ *
+ * Return 1 if the kthread needs to stop, 0 otherwise.
+ *
+ * Caller must disable bh.  This function can momentarily enable it.
+ */
+static int rcu_cpu_kthread_should_stop(int cpu)
+{
+	while (cpu_is_offline(cpu) ||
+	       !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
+	       smp_processor_id() != cpu) {
+		if (kthread_should_stop())
+			return 1;
+		per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+		per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
+		local_bh_enable();
+		schedule_timeout_uninterruptible(1);
+		if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
+			set_cpus_allowed_ptr(current, cpumask_of(cpu));
+		local_bh_disable();
+	}
+	per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
+	return 0;
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
+ * earlier RCU softirq.
+ */
+static int rcu_cpu_kthread(void *arg)
+{
+	int cpu = (int)(long)arg;
+	unsigned long flags;
+	int spincnt = 0;
+	unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
+	wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
+	char work;
+	char *workp = &per_cpu(rcu_cpu_has_work, cpu);
+
+	for (;;) {
+		*statusp = RCU_KTHREAD_WAITING;
+		wait_event_interruptible(*wqp,
+					 *workp != 0 || kthread_should_stop());
+		local_bh_disable();
+		if (rcu_cpu_kthread_should_stop(cpu)) {
+			local_bh_enable();
+			break;
+		}
+		*statusp = RCU_KTHREAD_RUNNING;
+		per_cpu(rcu_cpu_kthread_loops, cpu)++;
+		local_irq_save(flags);
+		work = *workp;
+		*workp = 0;
+		local_irq_restore(flags);
+		if (work)
+			rcu_process_callbacks();
+		local_bh_enable();
+		if (*workp != 0)
+			spincnt++;
+		else
+			spincnt = 0;
+		if (spincnt > 10) {
+			*statusp = RCU_KTHREAD_YIELDING;
+			rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
+			spincnt = 0;
+		}
+	}
+	*statusp = RCU_KTHREAD_STOPPED;
+	return 0;
+}
+
+/*
+ * Spawn a per-CPU kthread, setting up affinity and priority.
+ * Because the CPU hotplug lock is held, no other CPU will be attempting
+ * to manipulate rcu_cpu_kthread_task.  There might be another CPU
+ * attempting to access it during boot, but the locking in kthread_bind()
+ * will enforce sufficient ordering.
+ */
+static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
+{
+	struct sched_param sp;
+	struct task_struct *t;
+
+	if (!rcu_kthreads_spawnable ||
+	    per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
+		return 0;
+	t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+	kthread_bind(t, cpu);
+	per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
+	WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
+	per_cpu(rcu_cpu_kthread_task, cpu) = t;
+	wake_up_process(t);
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	return 0;
+}
+
+/*
+ * Per-rcu_node kthread, which is in charge of waking up the per-CPU
+ * kthreads when needed.  We ignore requests to wake up kthreads
+ * for offline CPUs, which is OK because force_quiescent_state()
+ * takes care of this case.
+ */
+static int rcu_node_kthread(void *arg)
+{
+	int cpu;
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp = (struct rcu_node *)arg;
+	struct sched_param sp;
+	struct task_struct *t;
+
+	for (;;) {
+		rnp->node_kthread_status = RCU_KTHREAD_WAITING;
+		wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0);
+		rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		mask = rnp->wakemask;
+		rnp->wakemask = 0;
+		rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
+		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
+			if ((mask & 0x1) == 0)
+				continue;
+			preempt_disable();
+			t = per_cpu(rcu_cpu_kthread_task, cpu);
+			if (!cpu_online(cpu) || t == NULL) {
+				preempt_enable();
+				continue;
+			}
+			per_cpu(rcu_cpu_has_work, cpu) = 1;
+			sp.sched_priority = RCU_KTHREAD_PRIO;
+			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+			preempt_enable();
+		}
+	}
+	/* NOTREACHED */
+	rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
+	return 0;
+}
+
+/*
+ * Set the per-rcu_node kthread's affinity to cover all CPUs that are
+ * served by the rcu_node in question.  The CPU hotplug lock is still
+ * held, so the value of rnp->qsmaskinit will be stable.
+ *
+ * We don't include outgoingcpu in the affinity set, use -1 if there is
+ * no outgoing CPU.  If there are no CPUs left in the affinity set,
+ * this function allows the kthread to execute on any CPU.
+ */
+static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
+{
+	cpumask_var_t cm;
+	int cpu;
+	unsigned long mask = rnp->qsmaskinit;
+
+	if (rnp->node_kthread_task == NULL)
+		return;
+	if (!alloc_cpumask_var(&cm, GFP_KERNEL))
+		return;
+	cpumask_clear(cm);
+	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
+		if ((mask & 0x1) && cpu != outgoingcpu)
+			cpumask_set_cpu(cpu, cm);
+	if (cpumask_weight(cm) == 0) {
+		cpumask_setall(cm);
+		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
+			cpumask_clear_cpu(cpu, cm);
+		WARN_ON_ONCE(cpumask_weight(cm) == 0);
+	}
+	set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
+	rcu_boost_kthread_setaffinity(rnp, cm);
+	free_cpumask_var(cm);
+}
+
+/*
+ * Spawn a per-rcu_node kthread, setting priority and affinity.
+ * Called during boot before online/offline can happen, or, if
+ * during runtime, with the main CPU-hotplug locks held.  So only
+ * one of these can be executing at a time.
+ */
+static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
+						struct rcu_node *rnp)
+{
+	unsigned long flags;
+	int rnp_index = rnp - &rsp->node[0];
+	struct sched_param sp;
+	struct task_struct *t;
+
+	if (!rcu_kthreads_spawnable ||
+	    rnp->qsmaskinit == 0)
+		return 0;
+	if (rnp->node_kthread_task == NULL) {
+		t = kthread_create(rcu_node_kthread, (void *)rnp,
+				   "rcun%d", rnp_index);
+		if (IS_ERR(t))
+			return PTR_ERR(t);
+		raw_spin_lock_irqsave(&rnp->lock, flags);
+		rnp->node_kthread_task = t;
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		wake_up_process(t);
+		sp.sched_priority = 99;
+		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	}
+	return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
+}
+
+/*
+ * Spawn all kthreads -- called as soon as the scheduler is running.
+ */
+static int __init rcu_spawn_kthreads(void)
+{
+	int cpu;
+	struct rcu_node *rnp;
+
+	rcu_kthreads_spawnable = 1;
+	for_each_possible_cpu(cpu) {
+		init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
+		per_cpu(rcu_cpu_has_work, cpu) = 0;
+		if (cpu_online(cpu))
+			(void)rcu_spawn_one_cpu_kthread(cpu);
+	}
+	rnp = rcu_get_root(rcu_state);
+	init_waitqueue_head(&rnp->node_wq);
+	rcu_init_boost_waitqueue(rnp);
+	(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+	if (NUM_RCU_NODES > 1)
+		rcu_for_each_leaf_node(rcu_state, rnp) {
+			init_waitqueue_head(&rnp->node_wq);
+			rcu_init_boost_waitqueue(rnp);
+			(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+		}
+	return 0;
+}
+early_initcall(rcu_spawn_kthreads);
+
 static void
 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	   struct rcu_state *rsp)
@@ -1439,6 +1844,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	/* Add the callback to our list. */
 	*rdp->nxttail[RCU_NEXT_TAIL] = head;
 	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
+	rdp->qlen++;
+
+	/* If interrupts were disabled, don't dive into RCU core. */
+	if (irqs_disabled_flags(flags)) {
+		local_irq_restore(flags);
+		return;
+	}
 
 	/*
 	 * Force the grace period if too many callbacks or too long waiting.
@@ -1447,7 +1859,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	 * invoking force_quiescent_state() if the newly enqueued callback
 	 * is the only one waiting for a grace period to complete.
 	 */
-	if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
+	if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
 
 		/* Are we ignoring a completed grace period? */
 		rcu_process_gp_end(rsp, rdp);
@@ -1583,7 +1995,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 		 * or RCU-bh, force a local reschedule.
 		 */
 		rdp->n_rp_qs_pending++;
-		if (!rdp->preemptable &&
+		if (!rdp->preemptible &&
 		    ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
 				 jiffies))
 			set_need_resched();
@@ -1760,7 +2172,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
  * that this CPU cannot possibly have any RCU callbacks in flight yet.
  */
 static void __cpuinit
-rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
+rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
 {
 	unsigned long flags;
 	unsigned long mask;
@@ -1772,7 +2184,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
 	rdp->passed_quiesc = 0;  /* We could be racing with new GP, */
 	rdp->qs_pending = 1;	 /*  so set up to respond to current GP. */
 	rdp->beenonline = 1;	 /* We have now been online. */
-	rdp->preemptable = preemptable;
+	rdp->preemptible = preemptible;
 	rdp->qlen_last_fqs_check = 0;
 	rdp->n_force_qs_snap = rsp->n_force_qs;
 	rdp->blimit = blimit;
@@ -1813,6 +2225,19 @@ static void __cpuinit rcu_online_cpu(int cpu)
 	rcu_preempt_init_percpu_data(cpu);
 }
 
+static void __cpuinit rcu_online_kthreads(int cpu)
+{
+	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;
+
+	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
+	if (rcu_kthreads_spawnable) {
+		(void)rcu_spawn_one_cpu_kthread(cpu);
+		if (rnp->node_kthread_task == NULL)
+			(void)rcu_spawn_one_node_kthread(rcu_state, rnp);
+	}
+}
+
 /*
  * Handle CPU online/offline notification events.
  */
@@ -1820,11 +2245,23 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 				    unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
+	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
 		rcu_online_cpu(cpu);
+		rcu_online_kthreads(cpu);
+		break;
+	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
+		rcu_node_kthread_setaffinity(rnp, -1);
+		rcu_cpu_kthread_setrt(cpu, 1);
+		break;
+	case CPU_DOWN_PREPARE:
+		rcu_node_kthread_setaffinity(rnp, cpu);
+		rcu_cpu_kthread_setrt(cpu, 0);
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
@@ -1943,10 +2380,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 					      j / rsp->levelspread[i - 1];
 			}
 			rnp->level = i;
-			INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
-			INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
-			INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
-			INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
+			INIT_LIST_HEAD(&rnp->blkd_tasks);
 		}
 	}
 
@@ -1968,7 +2402,6 @@ void __init rcu_init(void)
 	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 	__rcu_init_preempt();
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index e8f057e44e3e..257664815d5d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -91,6 +91,14 @@ struct rcu_dynticks {
 				/*  remains even for nmi from irq handler. */
 };
 
+/* RCU's kthread states for tracing. */
+#define RCU_KTHREAD_STOPPED  0
+#define RCU_KTHREAD_RUNNING  1
+#define RCU_KTHREAD_WAITING  2
+#define RCU_KTHREAD_OFFCPU   3
+#define RCU_KTHREAD_YIELDING 4
+#define RCU_KTHREAD_MAX      4
+
 /*
  * Definition for node within the RCU grace-period-detection hierarchy.
  */
@@ -109,10 +117,11 @@ struct rcu_node {
 				/*  an rcu_data structure, otherwise, each */
 				/*  bit corresponds to a child rcu_node */
 				/*  structure. */
-	unsigned long expmask;	/* Groups that have ->blocked_tasks[] */
+	unsigned long expmask;	/* Groups that have ->blkd_tasks */
 				/*  elements that need to drain to allow the */
 				/*  current expedited grace period to */
 				/*  complete (only for TREE_PREEMPT_RCU). */
+	unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
 	unsigned long qsmaskinit;
 				/* Per-GP initial value for qsmask & expmask. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
@@ -122,11 +131,68 @@ struct rcu_node {
 	u8	grpnum;		/* CPU/group number for next level up. */
 	u8	level;		/* root is at level 0. */
 	struct rcu_node *parent;
-	struct list_head blocked_tasks[4];
-				/* Tasks blocked in RCU read-side critsect. */
-				/*  Grace period number (->gpnum) x blocked */
-				/*  by tasks on the (x & 0x1) element of the */
-				/*  blocked_tasks[] array. */
+	struct list_head blkd_tasks;
+				/* Tasks blocked in RCU read-side critical */
+				/*  section.  Tasks are placed at the head */
+				/*  of this list and age towards the tail. */
+	struct list_head *gp_tasks;
+				/* Pointer to the first task blocking the */
+				/*  current grace period, or NULL if there */
+				/*  is no such task. */
+	struct list_head *exp_tasks;
+				/* Pointer to the first task blocking the */
+				/*  current expedited grace period, or NULL */
+				/*  if there is no such task.  If there */
+				/*  is no current expedited grace period, */
+				/*  then there can cannot be any such task. */
+#ifdef CONFIG_RCU_BOOST
+	struct list_head *boost_tasks;
+				/* Pointer to first task that needs to be */
+				/*  priority boosted, or NULL if no priority */
+				/*  boosting is needed for this rcu_node */
+				/*  structure.  If there are no tasks */
+				/*  queued on this rcu_node structure that */
+				/*  are blocking the current grace period, */
+				/*  there can be no such task. */
+	unsigned long boost_time;
+				/* When to start boosting (jiffies). */
+	struct task_struct *boost_kthread_task;
+				/* kthread that takes care of priority */
+				/*  boosting for this rcu_node structure. */
+	wait_queue_head_t boost_wq;
+				/* Wait queue on which to park the boost */
+				/*  kthread. */
+	unsigned int boost_kthread_status;
+				/* State of boost_kthread_task for tracing. */
+	unsigned long n_tasks_boosted;
+				/* Total number of tasks boosted. */
+	unsigned long n_exp_boosts;
+				/* Number of tasks boosted for expedited GP. */
+	unsigned long n_normal_boosts;
+				/* Number of tasks boosted for normal GP. */
+	unsigned long n_balk_blkd_tasks;
+				/* Refused to boost: no blocked tasks. */
+	unsigned long n_balk_exp_gp_tasks;
+				/* Refused to boost: nothing blocking GP. */
+	unsigned long n_balk_boost_tasks;
+				/* Refused to boost: already boosting. */
+	unsigned long n_balk_notblocked;
+				/* Refused to boost: RCU RS CS still running. */
+	unsigned long n_balk_notyet;
+				/* Refused to boost: not yet time. */
+	unsigned long n_balk_nos;
+				/* Refused to boost: not sure why, though. */
+				/*  This can happen due to race conditions. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+	struct task_struct *node_kthread_task;
+				/* kthread that takes care of this rcu_node */
+				/*  structure, for example, awakening the */
+				/*  per-CPU kthreads as needed. */
+	wait_queue_head_t node_wq;
+				/* Wait queue on which to park the per-node */
+				/*  kthread. */
+	unsigned int node_kthread_status;
+				/* State of node_kthread_task for tracing. */
 } ____cacheline_internodealigned_in_smp;
 
 /*
@@ -175,7 +241,7 @@ struct rcu_data {
 	bool		passed_quiesc;	/* User-mode/idle loop etc. */
 	bool		qs_pending;	/* Core waits for quiesc state. */
 	bool		beenonline;	/* CPU online at least once. */
-	bool		preemptable;	/* Preemptable RCU? */
+	bool		preemptible;	/* Preemptible RCU? */
 	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
 	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
 
@@ -254,7 +320,6 @@ struct rcu_data {
 #endif /* #else #ifdef CONFIG_NO_HZ */
 
 #define RCU_JIFFIES_TILL_FORCE_QS	 3	/* for rsp->jiffies_force_qs */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 
 #ifdef CONFIG_PROVE_RCU
 #define RCU_STALL_DELAY_DELTA	       (5 * HZ)
@@ -272,13 +337,6 @@ struct rcu_data {
 						/*  scheduling clock irq */
 						/*  before ratting on them. */
 
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE
-#define RCU_CPU_STALL_SUPPRESS_INIT 0
-#else
-#define RCU_CPU_STALL_SUPPRESS_INIT 1
-#endif
-
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /*
  * RCU global state, including node hierarchy.  This hierarchy is
@@ -325,12 +383,12 @@ struct rcu_state {
 						/*  due to lock unavailable. */
 	unsigned long n_force_qs_ngp;		/* Number of calls leaving */
 						/*  due to no GP active. */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 	unsigned long gp_start;			/* Time at which GP started, */
 						/*  but in jiffies. */
 	unsigned long jiffies_stall;		/* Time at which to check */
 						/*  for CPU stalls. */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+	unsigned long gp_max;			/* Maximum GP duration in */
+						/*  jiffies. */
 	char *name;				/* Name of structure. */
 };
 
@@ -361,16 +419,14 @@ DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
 static void rcu_bootup_announce(void);
 long rcu_batches_completed(void);
 static void rcu_preempt_note_context_switch(int cpu);
-static int rcu_preempted_readers(struct rcu_node *rnp);
+static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
 				      unsigned long flags);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
 static void rcu_print_task_stall(struct rcu_node *rnp);
 static void rcu_preempt_stall_reset(void);
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
@@ -390,5 +446,13 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
 static void rcu_preempt_send_cbs_to_online(void);
 static void __init __rcu_init_preempt(void);
 static void rcu_needs_cpu_flush(void);
+static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
+static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
+static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
+					  cpumask_var_t cm);
+static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
+static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+						 struct rcu_node *rnp,
+						 int rnp_index);
 
 #endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a3638710dc67..3f6559a5f5cd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1,7 +1,7 @@
 /*
  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
  * Internal non-public definitions that provide either classic
- * or preemptable semantics.
+ * or preemptible semantics.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,10 +54,6 @@ static void __init rcu_bootup_announce_oddness(void)
 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
 	printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
 #endif
-#ifndef CONFIG_RCU_CPU_STALL_DETECTOR
-	printk(KERN_INFO
-	       "\tRCU-based detection of stalled CPUs is disabled.\n");
-#endif
 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
 	printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
 #endif
@@ -70,6 +66,7 @@ static void __init rcu_bootup_announce_oddness(void)
 
 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
+static struct rcu_state *rcu_state = &rcu_preempt_state;
 
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
 
@@ -78,7 +75,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp);
  */
 static void __init rcu_bootup_announce(void)
 {
-	printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n");
+	printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
 	rcu_bootup_announce_oddness();
 }
 
@@ -111,7 +108,7 @@ void rcu_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 
 /*
- * Record a preemptable-RCU quiescent state for the specified CPU.  Note
+ * Record a preemptible-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
@@ -134,12 +131,12 @@ static void rcu_preempt_qs(int cpu)
  * We have entered the scheduler, and the current task might soon be
  * context-switched away from.  If this task is in an RCU read-side
  * critical section, we will no longer be able to rely on the CPU to
- * record that fact, so we enqueue the task on the appropriate entry
- * of the blocked_tasks[] array.  The task will dequeue itself when
- * it exits the outermost enclosing RCU read-side critical section.
- * Therefore, the current grace period cannot be permitted to complete
- * until the blocked_tasks[] entry indexed by the low-order bit of
- * rnp->gpnum empties.
+ * record that fact, so we enqueue the task on the blkd_tasks list.
+ * The task will dequeue itself when it exits the outermost enclosing
+ * RCU read-side critical section.  Therefore, the current grace period
+ * cannot be permitted to complete until the blkd_tasks list entries
+ * predating the current grace period drain, in other words, until
+ * rnp->gp_tasks becomes NULL.
  *
  * Caller must disable preemption.
  */
@@ -147,7 +144,6 @@ static void rcu_preempt_note_context_switch(int cpu)
 {
 	struct task_struct *t = current;
 	unsigned long flags;
-	int phase;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
@@ -169,15 +165,30 @@ static void rcu_preempt_note_context_switch(int cpu)
 		 * (i.e., this CPU has not yet passed through a quiescent
 		 * state for the current grace period), then as long
 		 * as that task remains queued, the current grace period
-		 * cannot end.
+		 * cannot end.  Note that there is some uncertainty as
+		 * to exactly when the current grace period started.
+		 * We take a conservative approach, which can result
+		 * in unnecessarily waiting on tasks that started very
+		 * slightly after the current grace period began.  C'est
+		 * la vie!!!
 		 *
 		 * But first, note that the current CPU must still be
 		 * on line!
 		 */
 		WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
 		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
-		phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
-		list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
+		if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
+			list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
+			rnp->gp_tasks = &t->rcu_node_entry;
+#ifdef CONFIG_RCU_BOOST
+			if (rnp->boost_tasks != NULL)
+				rnp->boost_tasks = rnp->gp_tasks;
+#endif /* #ifdef CONFIG_RCU_BOOST */
+		} else {
+			list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
+			if (rnp->qsmask & rdp->grpmask)
+				rnp->gp_tasks = &t->rcu_node_entry;
+		}
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	}
 
@@ -196,7 +207,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 }
 
 /*
- * Tree-preemptable RCU implementation for rcu_read_lock().
+ * Tree-preemptible RCU implementation for rcu_read_lock().
  * Just increment ->rcu_read_lock_nesting, shared state will be updated
  * if we block.
  */
@@ -212,12 +223,9 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
  * for the specified rcu_node structure.  If the caller needs a reliable
  * answer, it must hold the rcu_node's ->lock.
  */
-static int rcu_preempted_readers(struct rcu_node *rnp)
+static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 {
-	int phase = rnp->gpnum & 0x1;
-
-	return !list_empty(&rnp->blocked_tasks[phase]) ||
-	       !list_empty(&rnp->blocked_tasks[phase + 2]);
+	return rnp->gp_tasks != NULL;
 }
 
 /*
@@ -233,7 +241,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 	unsigned long mask;
 	struct rcu_node *rnp_p;
 
-	if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
+	if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		return;  /* Still need more quiescent states! */
 	}
@@ -257,6 +265,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 }
 
 /*
+ * Advance a ->blkd_tasks-list pointer to the next entry, instead
+ * returning NULL if at the end of the list.
+ */
+static struct list_head *rcu_next_node_entry(struct task_struct *t,
+					     struct rcu_node *rnp)
+{
+	struct list_head *np;
+
+	np = t->rcu_node_entry.next;
+	if (np == &rnp->blkd_tasks)
+		np = NULL;
+	return np;
+}
+
+/*
  * Handle special cases during rcu_read_unlock(), such as needing to
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
@@ -266,6 +289,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	int empty;
 	int empty_exp;
 	unsigned long flags;
+	struct list_head *np;
 	struct rcu_node *rnp;
 	int special;
 
@@ -306,10 +330,19 @@ static void rcu_read_unlock_special(struct task_struct *t)
 				break;
 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 		}
-		empty = !rcu_preempted_readers(rnp);
+		empty = !rcu_preempt_blocked_readers_cgp(rnp);
 		empty_exp = !rcu_preempted_readers_exp(rnp);
 		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
+		np = rcu_next_node_entry(t, rnp);
 		list_del_init(&t->rcu_node_entry);
+		if (&t->rcu_node_entry == rnp->gp_tasks)
+			rnp->gp_tasks = np;
+		if (&t->rcu_node_entry == rnp->exp_tasks)
+			rnp->exp_tasks = np;
+#ifdef CONFIG_RCU_BOOST
+		if (&t->rcu_node_entry == rnp->boost_tasks)
+			rnp->boost_tasks = np;
+#endif /* #ifdef CONFIG_RCU_BOOST */
 		t->rcu_blocked_node = NULL;
 
 		/*
@@ -322,6 +355,15 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		else
 			rcu_report_unblock_qs_rnp(rnp, flags);
 
+#ifdef CONFIG_RCU_BOOST
+		/* Unboost if we were boosted. */
+		if (special & RCU_READ_UNLOCK_BOOSTED) {
+			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
+			rt_mutex_unlock(t->rcu_boost_mutex);
+			t->rcu_boost_mutex = NULL;
+		}
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
 		/*
 		 * If this was the last task on the expedited lists,
 		 * then we need to report up the rcu_node hierarchy.
@@ -334,7 +376,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 }
 
 /*
- * Tree-preemptable RCU implementation for rcu_read_unlock().
+ * Tree-preemptible RCU implementation for rcu_read_unlock().
  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
  * invoke rcu_read_unlock_special() to clean up after a context switch
@@ -356,8 +398,6 @@ void __rcu_read_unlock(void)
 }
 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-
 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
 
 /*
@@ -367,18 +407,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 {
 	unsigned long flags;
-	struct list_head *lp;
-	int phase;
 	struct task_struct *t;
 
-	if (rcu_preempted_readers(rnp)) {
-		raw_spin_lock_irqsave(&rnp->lock, flags);
-		phase = rnp->gpnum & 0x1;
-		lp = &rnp->blocked_tasks[phase];
-		list_for_each_entry(t, lp, rcu_node_entry)
-			sched_show_task(t);
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	}
+	if (!rcu_preempt_blocked_readers_cgp(rnp))
+		return;
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	t = list_entry(rnp->gp_tasks,
+		       struct task_struct, rcu_node_entry);
+	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
+		sched_show_task(t);
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 
 /*
@@ -408,16 +446,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
  */
 static void rcu_print_task_stall(struct rcu_node *rnp)
 {
-	struct list_head *lp;
-	int phase;
 	struct task_struct *t;
 
-	if (rcu_preempted_readers(rnp)) {
-		phase = rnp->gpnum & 0x1;
-		lp = &rnp->blocked_tasks[phase];
-		list_for_each_entry(t, lp, rcu_node_entry)
-			printk(" P%d", t->pid);
-	}
+	if (!rcu_preempt_blocked_readers_cgp(rnp))
+		return;
+	t = list_entry(rnp->gp_tasks,
+		       struct task_struct, rcu_node_entry);
+	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
+		printk(" P%d", t->pid);
 }
 
 /*
@@ -430,18 +466,21 @@ static void rcu_preempt_stall_reset(void)
 	rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
 }
 
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
 /*
  * Check that the list of blocked tasks for the newly completed grace
  * period is in fact empty.  It is a serious bug to complete a grace
  * period that still has RCU readers blocked!  This function must be
  * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
  * must be held by the caller.
+ *
+ * Also, if there are blocked tasks on the list, they automatically
+ * block the newly created grace period, so set up ->gp_tasks accordingly.
  */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
-	WARN_ON_ONCE(rcu_preempted_readers(rnp));
+	WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
+	if (!list_empty(&rnp->blkd_tasks))
+		rnp->gp_tasks = rnp->blkd_tasks.next;
 	WARN_ON_ONCE(rnp->qsmask);
 }
 
@@ -465,50 +504,68 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 				     struct rcu_node *rnp,
 				     struct rcu_data *rdp)
 {
-	int i;
 	struct list_head *lp;
 	struct list_head *lp_root;
 	int retval = 0;
 	struct rcu_node *rnp_root = rcu_get_root(rsp);
-	struct task_struct *tp;
+	struct task_struct *t;
 
 	if (rnp == rnp_root) {
 		WARN_ONCE(1, "Last CPU thought to be offlined?");
 		return 0;  /* Shouldn't happen: at least one CPU online. */
 	}
-	WARN_ON_ONCE(rnp != rdp->mynode &&
-		     (!list_empty(&rnp->blocked_tasks[0]) ||
-		      !list_empty(&rnp->blocked_tasks[1]) ||
-		      !list_empty(&rnp->blocked_tasks[2]) ||
-		      !list_empty(&rnp->blocked_tasks[3])));
+
+	/* If we are on an internal node, complain bitterly. */
+	WARN_ON_ONCE(rnp != rdp->mynode);
 
 	/*
-	 * Move tasks up to root rcu_node.  Rely on the fact that the
-	 * root rcu_node can be at most one ahead of the rest of the
-	 * rcu_nodes in terms of gp_num value.  This fact allows us to
-	 * move the blocked_tasks[] array directly, element by element.
+	 * Move tasks up to root rcu_node.  Don't try to get fancy for
+	 * this corner-case operation -- just put this node's tasks
+	 * at the head of the root node's list, and update the root node's
+	 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
+	 * if non-NULL.  This might result in waiting for more tasks than
+	 * absolutely necessary, but this is a good performance/complexity
+	 * tradeoff.
 	 */
-	if (rcu_preempted_readers(rnp))
+	if (rcu_preempt_blocked_readers_cgp(rnp))
 		retval |= RCU_OFL_TASKS_NORM_GP;
 	if (rcu_preempted_readers_exp(rnp))
 		retval |= RCU_OFL_TASKS_EXP_GP;
-	for (i = 0; i < 4; i++) {
-		lp = &rnp->blocked_tasks[i];
-		lp_root = &rnp_root->blocked_tasks[i];
-		while (!list_empty(lp)) {
-			tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
-			raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
-			list_del(&tp->rcu_node_entry);
-			tp->rcu_blocked_node = rnp_root;
-			list_add(&tp->rcu_node_entry, lp_root);
-			raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
-		}
+	lp = &rnp->blkd_tasks;
+	lp_root = &rnp_root->blkd_tasks;
+	while (!list_empty(lp)) {
+		t = list_entry(lp->next, typeof(*t), rcu_node_entry);
+		raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
+		list_del(&t->rcu_node_entry);
+		t->rcu_blocked_node = rnp_root;
+		list_add(&t->rcu_node_entry, lp_root);
+		if (&t->rcu_node_entry == rnp->gp_tasks)
+			rnp_root->gp_tasks = rnp->gp_tasks;
+		if (&t->rcu_node_entry == rnp->exp_tasks)
+			rnp_root->exp_tasks = rnp->exp_tasks;
+#ifdef CONFIG_RCU_BOOST
+		if (&t->rcu_node_entry == rnp->boost_tasks)
+			rnp_root->boost_tasks = rnp->boost_tasks;
+#endif /* #ifdef CONFIG_RCU_BOOST */
+		raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
 	}
+
+#ifdef CONFIG_RCU_BOOST
+	/* In case root is being boosted and leaf is not. */
+	raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
+	if (rnp_root->boost_tasks != NULL &&
+	    rnp_root->boost_tasks != rnp_root->gp_tasks)
+		rnp_root->boost_tasks = rnp_root->gp_tasks;
+	raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+	rnp->gp_tasks = NULL;
+	rnp->exp_tasks = NULL;
 	return retval;
 }
 
 /*
- * Do CPU-offline processing for preemptable RCU.
+ * Do CPU-offline processing for preemptible RCU.
  */
 static void rcu_preempt_offline_cpu(int cpu)
 {
@@ -537,7 +594,7 @@ static void rcu_preempt_check_callbacks(int cpu)
 }
 
 /*
- * Process callbacks for preemptable RCU.
+ * Process callbacks for preemptible RCU.
  */
 static void rcu_preempt_process_callbacks(void)
 {
@@ -546,7 +603,7 @@ static void rcu_preempt_process_callbacks(void)
 }
 
 /*
- * Queue a preemptable-RCU callback for invocation after a grace period.
+ * Queue a preemptible-RCU callback for invocation after a grace period.
  */
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
@@ -594,8 +651,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
  */
 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 {
-	return !list_empty(&rnp->blocked_tasks[2]) ||
-	       !list_empty(&rnp->blocked_tasks[3]);
+	return rnp->exp_tasks != NULL;
 }
 
 /*
@@ -655,13 +711,17 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 static void
 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 {
-	int must_wait;
+	unsigned long flags;
+	int must_wait = 0;
 
-	raw_spin_lock(&rnp->lock); /* irqs already disabled */
-	list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
-	list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
-	must_wait = rcu_preempted_readers_exp(rnp);
-	raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	if (list_empty(&rnp->blkd_tasks))
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	else {
+		rnp->exp_tasks = rnp->blkd_tasks.next;
+		rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
+		must_wait = 1;
+	}
 	if (!must_wait)
 		rcu_report_exp_rnp(rsp, rnp);
 }
@@ -669,9 +729,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 /*
  * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
  * is to invoke synchronize_sched_expedited() to push all the tasks to
- * the ->blocked_tasks[] lists, move all entries from the first set of
- * ->blocked_tasks[] lists to the second set, and finally wait for this
- * second set to drain.
+ * the ->blkd_tasks lists and wait for this list to drain.
  */
 void synchronize_rcu_expedited(void)
 {
@@ -703,7 +761,7 @@ void synchronize_rcu_expedited(void)
 	if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
 		goto unlock_mb_ret; /* Others did our work for us. */
 
-	/* force all RCU readers onto blocked_tasks[]. */
+	/* force all RCU readers onto ->blkd_tasks lists. */
 	synchronize_sched_expedited();
 
 	raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -715,7 +773,7 @@ void synchronize_rcu_expedited(void)
 		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 	}
 
-	/* Snapshot current state of ->blocked_tasks[] lists. */
+	/* Snapshot current state of ->blkd_tasks lists. */
 	rcu_for_each_leaf_node(rsp, rnp)
 		sync_rcu_preempt_exp_init(rsp, rnp);
 	if (NUM_RCU_NODES > 1)
@@ -723,7 +781,7 @@ void synchronize_rcu_expedited(void)
 
 	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 
-	/* Wait for snapshotted ->blocked_tasks[] lists to drain. */
+	/* Wait for snapshotted ->blkd_tasks lists to drain. */
 	rnp = rcu_get_root(rsp);
 	wait_event(sync_rcu_preempt_exp_wq,
 		   sync_rcu_preempt_exp_done(rnp));
@@ -739,7 +797,7 @@ mb_ret:
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
 /*
- * Check to see if there is any immediate preemptable-RCU-related work
+ * Check to see if there is any immediate preemptible-RCU-related work
  * to be done.
  */
 static int rcu_preempt_pending(int cpu)
@@ -749,7 +807,7 @@ static int rcu_preempt_pending(int cpu)
 }
 
 /*
- * Does preemptable RCU need the CPU to stay out of dynticks mode?
+ * Does preemptible RCU need the CPU to stay out of dynticks mode?
  */
 static int rcu_preempt_needs_cpu(int cpu)
 {
@@ -766,7 +824,7 @@ void rcu_barrier(void)
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
 /*
- * Initialize preemptable RCU's per-CPU data.
+ * Initialize preemptible RCU's per-CPU data.
  */
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 {
@@ -774,7 +832,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
- * Move preemptable RCU's callbacks from dying CPU to other online CPU.
+ * Move preemptible RCU's callbacks from dying CPU to other online CPU.
  */
 static void rcu_preempt_send_cbs_to_online(void)
 {
@@ -782,7 +840,7 @@ static void rcu_preempt_send_cbs_to_online(void)
 }
 
 /*
- * Initialize preemptable RCU's state structures.
+ * Initialize preemptible RCU's state structures.
  */
 static void __init __rcu_init_preempt(void)
 {
@@ -790,7 +848,7 @@ static void __init __rcu_init_preempt(void)
 }
 
 /*
- * Check for a task exiting while in a preemptable-RCU read-side
+ * Check for a task exiting while in a preemptible-RCU read-side
  * critical section, clean up if so.  No need to issue warnings,
  * as debug_check_no_locks_held() already does this if lockdep
  * is enabled.
@@ -802,11 +860,13 @@ void exit_rcu(void)
 	if (t->rcu_read_lock_nesting == 0)
 		return;
 	t->rcu_read_lock_nesting = 1;
-	rcu_read_unlock();
+	__rcu_read_unlock();
 }
 
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
+static struct rcu_state *rcu_state = &rcu_sched_state;
+
 /*
  * Tell them what RCU they are running.
  */
@@ -836,7 +896,7 @@ void rcu_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 
 /*
- * Because preemptable RCU does not exist, we never have to check for
+ * Because preemptible RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
 static void rcu_preempt_note_context_switch(int cpu)
@@ -844,10 +904,10 @@ static void rcu_preempt_note_context_switch(int cpu)
 }
 
 /*
- * Because preemptable RCU does not exist, there are never any preempted
+ * Because preemptible RCU does not exist, there are never any preempted
  * RCU readers.
  */
-static int rcu_preempted_readers(struct rcu_node *rnp)
+static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 {
 	return 0;
 }
@@ -862,10 +922,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-
 /*
- * Because preemptable RCU does not exist, we never have to check for
+ * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
  */
 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
@@ -873,7 +931,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 }
 
 /*
- * Because preemptable RCU does not exist, we never have to check for
+ * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
  */
 static void rcu_print_task_stall(struct rcu_node *rnp)
@@ -888,10 +946,8 @@ static void rcu_preempt_stall_reset(void)
 {
 }
 
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
 /*
- * Because there is no preemptable RCU, there can be no readers blocked,
+ * Because there is no preemptible RCU, there can be no readers blocked,
  * so there is no need to check for blocked tasks.  So check only for
  * bogus qsmask values.
  */
@@ -903,7 +959,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
- * Because preemptable RCU does not exist, it never needs to migrate
+ * Because preemptible RCU does not exist, it never needs to migrate
  * tasks that were blocked within RCU read-side critical sections, and
  * such non-existent tasks cannot possibly have been blocking the current
  * grace period.
@@ -916,7 +972,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 }
 
 /*
- * Because preemptable RCU does not exist, it never needs CPU-offline
+ * Because preemptible RCU does not exist, it never needs CPU-offline
  * processing.
  */
 static void rcu_preempt_offline_cpu(int cpu)
@@ -926,7 +982,7 @@ static void rcu_preempt_offline_cpu(int cpu)
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
 /*
- * Because preemptable RCU does not exist, it never has any callbacks
+ * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
  */
 static void rcu_preempt_check_callbacks(int cpu)
@@ -934,7 +990,7 @@ static void rcu_preempt_check_callbacks(int cpu)
 }
 
 /*
- * Because preemptable RCU does not exist, it never has any callbacks
+ * Because preemptible RCU does not exist, it never has any callbacks
  * to process.
  */
 static void rcu_preempt_process_callbacks(void)
@@ -943,7 +999,7 @@ static void rcu_preempt_process_callbacks(void)
 
 /*
  * Wait for an rcu-preempt grace period, but make it happen quickly.
- * But because preemptable RCU does not exist, map to rcu-sched.
+ * But because preemptible RCU does not exist, map to rcu-sched.
  */
 void synchronize_rcu_expedited(void)
 {
@@ -954,7 +1010,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
- * Because preemptable RCU does not exist, there is never any need to
+ * Because preemptible RCU does not exist, there is never any need to
  * report on tasks preempted in RCU read-side critical sections during
  * expedited RCU grace periods.
  */
@@ -966,7 +1022,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
 /*
- * Because preemptable RCU does not exist, it never has any work to do.
+ * Because preemptible RCU does not exist, it never has any work to do.
  */
 static int rcu_preempt_pending(int cpu)
 {
@@ -974,7 +1030,7 @@ static int rcu_preempt_pending(int cpu)
 }
 
 /*
- * Because preemptable RCU does not exist, it never needs any CPU.
+ * Because preemptible RCU does not exist, it never needs any CPU.
  */
 static int rcu_preempt_needs_cpu(int cpu)
 {
@@ -982,7 +1038,7 @@ static int rcu_preempt_needs_cpu(int cpu)
 }
 
 /*
- * Because preemptable RCU does not exist, rcu_barrier() is just
+ * Because preemptible RCU does not exist, rcu_barrier() is just
  * another name for rcu_barrier_sched().
  */
 void rcu_barrier(void)
@@ -992,7 +1048,7 @@ void rcu_barrier(void)
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
 /*
- * Because preemptable RCU does not exist, there is no per-CPU
+ * Because preemptible RCU does not exist, there is no per-CPU
  * data to initialize.
  */
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
@@ -1000,14 +1056,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
- * Because there is no preemptable RCU, there are no callbacks to move.
+ * Because there is no preemptible RCU, there are no callbacks to move.
  */
 static void rcu_preempt_send_cbs_to_online(void)
 {
 }
 
 /*
- * Because preemptable RCU does not exist, it need not be initialized.
+ * Because preemptible RCU does not exist, it need not be initialized.
  */
 static void __init __rcu_init_preempt(void)
 {
@@ -1015,6 +1071,276 @@ static void __init __rcu_init_preempt(void)
 
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
+#ifdef CONFIG_RCU_BOOST
+
+#include "rtmutex_common.h"
+
+#ifdef CONFIG_RCU_TRACE
+
+static void rcu_initiate_boost_trace(struct rcu_node *rnp)
+{
+	if (list_empty(&rnp->blkd_tasks))
+		rnp->n_balk_blkd_tasks++;
+	else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
+		rnp->n_balk_exp_gp_tasks++;
+	else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
+		rnp->n_balk_boost_tasks++;
+	else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
+		rnp->n_balk_notblocked++;
+	else if (rnp->gp_tasks != NULL &&
+		 ULONG_CMP_LT(jiffies, rnp->boost_time))
+		rnp->n_balk_notyet++;
+	else
+		rnp->n_balk_nos++;
+}
+
+#else /* #ifdef CONFIG_RCU_TRACE */
+
+static void rcu_initiate_boost_trace(struct rcu_node *rnp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+/*
+ * Carry out RCU priority boosting on the task indicated by ->exp_tasks
+ * or ->boost_tasks, advancing the pointer to the next task in the
+ * ->blkd_tasks list.
+ *
+ * Note that irqs must be enabled: boosting the task can block.
+ * Returns 1 if there are more tasks needing to be boosted.
+ */
+static int rcu_boost(struct rcu_node *rnp)
+{
+	unsigned long flags;
+	struct rt_mutex mtx;
+	struct task_struct *t;
+	struct list_head *tb;
+
+	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
+		return 0;  /* Nothing left to boost. */
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+
+	/*
+	 * Recheck under the lock: all tasks in need of boosting
+	 * might exit their RCU read-side critical sections on their own.
+	 */
+	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		return 0;
+	}
+
+	/*
+	 * Preferentially boost tasks blocking expedited grace periods.
+	 * This cannot starve the normal grace periods because a second
+	 * expedited grace period must boost all blocked tasks, including
+	 * those blocking the pre-existing normal grace period.
+	 */
+	if (rnp->exp_tasks != NULL) {
+		tb = rnp->exp_tasks;
+		rnp->n_exp_boosts++;
+	} else {
+		tb = rnp->boost_tasks;
+		rnp->n_normal_boosts++;
+	}
+	rnp->n_tasks_boosted++;
+
+	/*
+	 * We boost task t by manufacturing an rt_mutex that appears to
+	 * be held by task t.  We leave a pointer to that rt_mutex where
+	 * task t can find it, and task t will release the mutex when it
+	 * exits its outermost RCU read-side critical section.  Then
+	 * simply acquiring this artificial rt_mutex will boost task
+	 * t's priority.  (Thanks to tglx for suggesting this approach!)
+	 *
+	 * Note that task t must acquire rnp->lock to remove itself from
+	 * the ->blkd_tasks list, which it will do from exit() if from
+	 * nowhere else.  We therefore are guaranteed that task t will
+	 * stay around at least until we drop rnp->lock.  Note that
+	 * rnp->lock also resolves races between our priority boosting
+	 * and task t's exiting its outermost RCU read-side critical
+	 * section.
+	 */
+	t = container_of(tb, struct task_struct, rcu_node_entry);
+	rt_mutex_init_proxy_locked(&mtx, t);
+	t->rcu_boost_mutex = &mtx;
+	t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */
+	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
+
+	return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
+}
+
+/*
+ * Timer handler to initiate waking up of boost kthreads that
+ * have yielded the CPU due to excessive numbers of tasks to
+ * boost.  We wake up the per-rcu_node kthread, which in turn
+ * will wake up the booster kthread.
+ */
+static void rcu_boost_kthread_timer(unsigned long arg)
+{
+	invoke_rcu_node_kthread((struct rcu_node *)arg);
+}
+
+/*
+ * Priority-boosting kthread.  One per leaf rcu_node and one for the
+ * root rcu_node.
+ */
+static int rcu_boost_kthread(void *arg)
+{
+	struct rcu_node *rnp = (struct rcu_node *)arg;
+	int spincnt = 0;
+	int more2boost;
+
+	for (;;) {
+		rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
+		wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
+							rnp->exp_tasks);
+		rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
+		more2boost = rcu_boost(rnp);
+		if (more2boost)
+			spincnt++;
+		else
+			spincnt = 0;
+		if (spincnt > 10) {
+			rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
+			spincnt = 0;
+		}
+	}
+	/* NOTREACHED */
+	return 0;
+}
+
+/*
+ * Check to see if it is time to start boosting RCU readers that are
+ * blocking the current grace period, and, if so, tell the per-rcu_node
+ * kthread to start boosting them.  If there is an expedited grace
+ * period in progress, it is always time to boost.
+ *
+ * The caller must hold rnp->lock, which this function releases,
+ * but irqs remain disabled.  The ->boost_kthread_task is immortal,
+ * so we don't need to worry about it going away.
+ */
+static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
+{
+	struct task_struct *t;
+
+	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
+		rnp->n_balk_exp_gp_tasks++;
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+	if (rnp->exp_tasks != NULL ||
+	    (rnp->gp_tasks != NULL &&
+	     rnp->boost_tasks == NULL &&
+	     rnp->qsmask == 0 &&
+	     ULONG_CMP_GE(jiffies, rnp->boost_time))) {
+		if (rnp->exp_tasks == NULL)
+			rnp->boost_tasks = rnp->gp_tasks;
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		t = rnp->boost_kthread_task;
+		if (t != NULL)
+			wake_up_process(t);
+	} else {
+		rcu_initiate_boost_trace(rnp);
+		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	}
+}
+
+/*
+ * Set the affinity of the boost kthread.  The CPU-hotplug locks are
+ * held, so no one should be messing with the existence of the boost
+ * kthread.
+ */
+static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
+					  cpumask_var_t cm)
+{
+	struct task_struct *t;
+
+	t = rnp->boost_kthread_task;
+	if (t != NULL)
+		set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
+}
+
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
+
+/*
+ * Do priority-boost accounting for the start of a new grace period.
+ */
+static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
+{
+	rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
+}
+
+/*
+ * Initialize the RCU-boost waitqueue.
+ */
+static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
+{
+	init_waitqueue_head(&rnp->boost_wq);
+}
+
+/*
+ * Create an RCU-boost kthread for the specified node if one does not
+ * already exist.  We only create this kthread for preemptible RCU.
+ * Returns zero if all is well, a negated errno otherwise.
+ */
+static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+						 struct rcu_node *rnp,
+						 int rnp_index)
+{
+	unsigned long flags;
+	struct sched_param sp;
+	struct task_struct *t;
+
+	if (&rcu_preempt_state != rsp)
+		return 0;
+	if (rnp->boost_kthread_task != NULL)
+		return 0;
+	t = kthread_create(rcu_boost_kthread, (void *)rnp,
+			   "rcub%d", rnp_index);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	rnp->boost_kthread_task = t;
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	wake_up_process(t);
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	return 0;
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
+{
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
+					  cpumask_var_t cm)
+{
+}
+
+static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
+{
+}
+
+static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
+{
+}
+
+static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
+						 struct rcu_node *rnp,
+						 int rnp_index)
+{
+	return 0;
+}
+
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
 #ifndef CONFIG_SMP
 
 void synchronize_sched_expedited(void)
@@ -1187,8 +1513,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
  *
  * Because it is not legal to invoke rcu_process_callbacks() with irqs
  * disabled, we do one pass of force_quiescent_state(), then do a
- * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
- * The per-cpu rcu_dyntick_drain variable controls the sequencing.
+ * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
+ * later.  The per-cpu rcu_dyntick_drain variable controls the sequencing.
  */
 int rcu_needs_cpu(int cpu)
 {
@@ -1239,7 +1565,7 @@ int rcu_needs_cpu(int cpu)
 
 	/* If RCU callbacks are still pending, RCU still needs this CPU. */
 	if (c)
-		raise_softirq(RCU_SOFTIRQ);
+		invoke_rcu_cpu_kthread();
 	return c;
 }
 
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index c8e97853b970..aa0fd72b4bc7 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,6 +46,18 @@
 #define RCU_TREE_NONCORE
 #include "rcutree.h"
 
+DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
+DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
+DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
+DECLARE_PER_CPU(char, rcu_cpu_has_work);
+
+static char convert_kthread_status(unsigned int kthread_status)
+{
+	if (kthread_status > RCU_KTHREAD_MAX)
+		return '?';
+	return "SRWOY"[kthread_status];
+}
+
 static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 {
 	if (!rdp->beenonline)
@@ -64,7 +76,21 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->dynticks_fqs);
 #endif /* #ifdef CONFIG_NO_HZ */
 	seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
-	seq_printf(m, " ql=%ld b=%ld", rdp->qlen, rdp->blimit);
+	seq_printf(m, " ql=%ld qs=%c%c%c%c kt=%d/%c/%d ktl=%x b=%ld",
+		   rdp->qlen,
+		   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
+			rdp->nxttail[RCU_NEXT_TAIL]],
+		   ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
+			rdp->nxttail[RCU_NEXT_READY_TAIL]],
+		   ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
+			rdp->nxttail[RCU_WAIT_TAIL]],
+		   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
+		   per_cpu(rcu_cpu_has_work, rdp->cpu),
+		   convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
+					  rdp->cpu)),
+		   per_cpu(rcu_cpu_kthread_cpu, rdp->cpu),
+		   per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff,
+		   rdp->blimit);
 	seq_printf(m, " ci=%lu co=%lu ca=%lu\n",
 		   rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
 }
@@ -121,7 +147,18 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->dynticks_fqs);
 #endif /* #ifdef CONFIG_NO_HZ */
 	seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
-	seq_printf(m, ",%ld,%ld", rdp->qlen, rdp->blimit);
+	seq_printf(m, ",%ld,\"%c%c%c%c\",%d,\"%c\",%ld", rdp->qlen,
+		   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
+			rdp->nxttail[RCU_NEXT_TAIL]],
+		   ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
+			rdp->nxttail[RCU_NEXT_READY_TAIL]],
+		   ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
+			rdp->nxttail[RCU_WAIT_TAIL]],
+		   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
+		   per_cpu(rcu_cpu_has_work, rdp->cpu),
+		   convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
+					  rdp->cpu)),
+		   rdp->blimit);
 	seq_printf(m, ",%lu,%lu,%lu\n",
 		   rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
 }
@@ -157,11 +194,76 @@ static const struct file_operations rcudata_csv_fops = {
 	.release = single_release,
 };
 
+#ifdef CONFIG_RCU_BOOST
+
+static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
+{
+	seq_printf(m,  "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu "
+		   "j=%04x bt=%04x\n",
+		   rnp->grplo, rnp->grphi,
+		   "T."[list_empty(&rnp->blkd_tasks)],
+		   "N."[!rnp->gp_tasks],
+		   "E."[!rnp->exp_tasks],
+		   "B."[!rnp->boost_tasks],
+		   convert_kthread_status(rnp->boost_kthread_status),
+		   rnp->n_tasks_boosted, rnp->n_exp_boosts,
+		   rnp->n_normal_boosts,
+		   (int)(jiffies & 0xffff),
+		   (int)(rnp->boost_time & 0xffff));
+	seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
+		   "     balk",
+		   rnp->n_balk_blkd_tasks,
+		   rnp->n_balk_exp_gp_tasks,
+		   rnp->n_balk_boost_tasks,
+		   rnp->n_balk_notblocked,
+		   rnp->n_balk_notyet,
+		   rnp->n_balk_nos);
+}
+
+static int show_rcu_node_boost(struct seq_file *m, void *unused)
+{
+	struct rcu_node *rnp;
+
+	rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
+		print_one_rcu_node_boost(m, rnp);
+	return 0;
+}
+
+static int rcu_node_boost_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcu_node_boost, NULL);
+}
+
+static const struct file_operations rcu_node_boost_fops = {
+	.owner = THIS_MODULE,
+	.open = rcu_node_boost_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ * Create the rcuboost debugfs entry.  Standard error return.
+ */
+static int rcu_boost_trace_create_file(struct dentry *rcudir)
+{
+	return !debugfs_create_file("rcuboost", 0444, rcudir, NULL,
+				    &rcu_node_boost_fops);
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+static int rcu_boost_trace_create_file(struct dentry *rcudir)
+{
+	return 0;  /* There cannot be an error if we didn't create it! */
+}
+
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
 static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 {
 	unsigned long gpnum;
 	int level = 0;
-	int phase;
 	struct rcu_node *rnp;
 
 	gpnum = rsp->gpnum;
@@ -178,13 +280,11 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 			seq_puts(m, "\n");
 			level = rnp->level;
 		}
-		phase = gpnum & 0x1;
-		seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d    ",
+		seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d    ",
 			   rnp->qsmask, rnp->qsmaskinit,
-			   "T."[list_empty(&rnp->blocked_tasks[phase])],
-			   "E."[list_empty(&rnp->blocked_tasks[phase + 2])],
-			   "T."[list_empty(&rnp->blocked_tasks[!phase])],
-			   "E."[list_empty(&rnp->blocked_tasks[!phase + 2])],
+			   ".G"[rnp->gp_tasks != NULL],
+			   ".E"[rnp->exp_tasks != NULL],
+			   ".T"[!list_empty(&rnp->blkd_tasks)],
 			   rnp->grplo, rnp->grphi, rnp->grpnum);
 	}
 	seq_puts(m, "\n");
@@ -216,16 +316,35 @@ static const struct file_operations rcuhier_fops = {
 	.release = single_release,
 };
 
+static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
+{
+	unsigned long flags;
+	unsigned long completed;
+	unsigned long gpnum;
+	unsigned long gpage;
+	unsigned long gpmax;
+	struct rcu_node *rnp = &rsp->node[0];
+
+	raw_spin_lock_irqsave(&rnp->lock, flags);
+	completed = rsp->completed;
+	gpnum = rsp->gpnum;
+	if (rsp->completed == rsp->gpnum)
+		gpage = 0;
+	else
+		gpage = jiffies - rsp->gp_start;
+	gpmax = rsp->gp_max;
+	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	seq_printf(m, "%s: completed=%ld  gpnum=%lu  age=%ld  max=%ld\n",
+		   rsp->name, completed, gpnum, gpage, gpmax);
+}
+
 static int show_rcugp(struct seq_file *m, void *unused)
 {
 #ifdef CONFIG_TREE_PREEMPT_RCU
-	seq_printf(m, "rcu_preempt: completed=%ld  gpnum=%lu\n",
-		   rcu_preempt_state.completed, rcu_preempt_state.gpnum);
+	show_one_rcugp(m, &rcu_preempt_state);
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-	seq_printf(m, "rcu_sched: completed=%ld  gpnum=%lu\n",
-		   rcu_sched_state.completed, rcu_sched_state.gpnum);
-	seq_printf(m, "rcu_bh: completed=%ld  gpnum=%lu\n",
-		   rcu_bh_state.completed, rcu_bh_state.gpnum);
+	show_one_rcugp(m, &rcu_sched_state);
+	show_one_rcugp(m, &rcu_bh_state);
 	return 0;
 }
 
@@ -298,6 +417,29 @@ static const struct file_operations rcu_pending_fops = {
 	.release = single_release,
 };
 
+static int show_rcutorture(struct seq_file *m, void *unused)
+{
+	seq_printf(m, "rcutorture test sequence: %lu %s\n",
+		   rcutorture_testseq >> 1,
+		   (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
+	seq_printf(m, "rcutorture update version number: %lu\n",
+		   rcutorture_vernum);
+	return 0;
+}
+
+static int rcutorture_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcutorture, NULL);
+}
+
+static const struct file_operations rcutorture_fops = {
+	.owner = THIS_MODULE,
+	.open = rcutorture_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
 static struct dentry *rcudir;
 
 static int __init rcutree_trace_init(void)
@@ -318,6 +460,9 @@ static int __init rcutree_trace_init(void)
 	if (!retval)
 		goto free_out;
 
+	if (rcu_boost_trace_create_file(rcudir))
+		goto free_out;
+
 	retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
 	if (!retval)
 		goto free_out;
@@ -331,6 +476,11 @@ static int __init rcutree_trace_init(void)
 						NULL, &rcu_pending_fops);
 	if (!retval)
 		goto free_out;
+
+	retval = debugfs_create_file("rcutorture", 0444, rcudir,
+						NULL, &rcutorture_fops);
+	if (!retval)
+		goto free_out;
 	return 0;
 free_out:
 	debugfs_remove_recursive(rcudir);
diff --git a/kernel/sched.c b/kernel/sched.c
index 48013633d792..c62acf45d3b9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -231,7 +231,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
 #endif
 
 /*
- * sched_domains_mutex serializes calls to arch_init_sched_domains,
+ * sched_domains_mutex serializes calls to init_sched_domains,
  * detach_destroy_domains and partition_sched_domains.
  */
 static DEFINE_MUTEX(sched_domains_mutex);
@@ -312,6 +312,9 @@ struct cfs_rq {
 
 	u64 exec_clock;
 	u64 min_vruntime;
+#ifndef CONFIG_64BIT
+	u64 min_vruntime_copy;
+#endif
 
 	struct rb_root tasks_timeline;
 	struct rb_node *rb_leftmost;
@@ -325,7 +328,9 @@ struct cfs_rq {
 	 */
 	struct sched_entity *curr, *next, *last, *skip;
 
+#ifdef	CONFIG_SCHED_DEBUG
 	unsigned int nr_spread_over;
+#endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
@@ -417,6 +422,7 @@ struct rt_rq {
  */
 struct root_domain {
 	atomic_t refcount;
+	struct rcu_head rcu;
 	cpumask_var_t span;
 	cpumask_var_t online;
 
@@ -460,7 +466,7 @@ struct rq {
 	u64 nohz_stamp;
 	unsigned char nohz_balance_kick;
 #endif
-	unsigned int skip_clock_update;
+	int skip_clock_update;
 
 	/* capture load from *all* tasks on this cpu: */
 	struct load_weight load;
@@ -553,6 +559,10 @@ struct rq {
 	unsigned int ttwu_count;
 	unsigned int ttwu_local;
 #endif
+
+#ifdef CONFIG_SMP
+	struct task_struct *wake_list;
+#endif
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -571,7 +581,7 @@ static inline int cpu_of(struct rq *rq)
 
 #define rcu_dereference_check_sched_domain(p) \
 	rcu_dereference_check((p), \
-			      rcu_read_lock_sched_held() || \
+			      rcu_read_lock_held() || \
 			      lockdep_is_held(&sched_domains_mutex))
 
 /*
@@ -596,7 +606,7 @@ static inline int cpu_of(struct rq *rq)
  * Return the group to which this tasks belongs.
  *
  * We use task_subsys_state_check() and extend the RCU verification
- * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
+ * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
  * holds that lock for each task it moves into the cgroup. Therefore
  * by holding that lock, we pin the task to the current cgroup.
  */
@@ -606,7 +616,7 @@ static inline struct task_group *task_group(struct task_struct *p)
 	struct cgroup_subsys_state *css;
 
 	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-			lockdep_is_held(&task_rq(p)->lock));
+			lockdep_is_held(&p->pi_lock));
 	tg = container_of(css, struct task_group, css);
 
 	return autogroup_task_group(p, tg);
@@ -642,7 +652,7 @@ static void update_rq_clock(struct rq *rq)
 {
 	s64 delta;
 
-	if (rq->skip_clock_update)
+	if (rq->skip_clock_update > 0)
 		return;
 
 	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
@@ -838,18 +848,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
 	return rq->curr == p;
 }
 
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline int task_running(struct rq *rq, struct task_struct *p)
 {
+#ifdef CONFIG_SMP
+	return p->on_cpu;
+#else
 	return task_current(rq, p);
+#endif
 }
 
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
 static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * We can optimise this out completely for !SMP, because the
+	 * SMP rebalancing from interrupt is the only thing that cares
+	 * here.
+	 */
+	next->on_cpu = 1;
+#endif
 }
 
 static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 */
+	smp_wmb();
+	prev->on_cpu = 0;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
@@ -865,15 +896,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline int task_running(struct rq *rq, struct task_struct *p)
-{
-#ifdef CONFIG_SMP
-	return p->oncpu;
-#else
-	return task_current(rq, p);
-#endif
-}
-
 static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
 #ifdef CONFIG_SMP
@@ -882,7 +904,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 	 * SMP rebalancing from interrupt is the only thing that cares
 	 * here.
 	 */
-	next->oncpu = 1;
+	next->on_cpu = 1;
 #endif
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	raw_spin_unlock_irq(&rq->lock);
@@ -895,12 +917,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_SMP
 	/*
-	 * After ->oncpu is cleared, the task can be moved to a different CPU.
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
 	 * We must ensure this doesn't happen until the switch is completely
 	 * finished.
 	 */
 	smp_wmb();
-	prev->oncpu = 0;
+	prev->on_cpu = 0;
 #endif
 #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 	local_irq_enable();
@@ -909,23 +931,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
 
 /*
- * Check whether the task is waking, we use this to synchronize ->cpus_allowed
- * against ttwu().
- */
-static inline int task_is_waking(struct task_struct *p)
-{
-	return unlikely(p->state == TASK_WAKING);
-}
-
-/*
- * __task_rq_lock - lock the runqueue a given task resides on.
- * Must be called interrupts disabled.
+ * __task_rq_lock - lock the rq @p resides on.
  */
 static inline struct rq *__task_rq_lock(struct task_struct *p)
 	__acquires(rq->lock)
 {
 	struct rq *rq;
 
+	lockdep_assert_held(&p->pi_lock);
+
 	for (;;) {
 		rq = task_rq(p);
 		raw_spin_lock(&rq->lock);
@@ -936,22 +950,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
 }
 
 /*
- * task_rq_lock - lock the runqueue a given task resides on and disable
- * interrupts. Note the ordering: we can safely lookup the task_rq without
- * explicitly disabling preemption.
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
  */
 static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+	__acquires(p->pi_lock)
 	__acquires(rq->lock)
 {
 	struct rq *rq;
 
 	for (;;) {
-		local_irq_save(*flags);
+		raw_spin_lock_irqsave(&p->pi_lock, *flags);
 		rq = task_rq(p);
 		raw_spin_lock(&rq->lock);
 		if (likely(rq == task_rq(p)))
 			return rq;
-		raw_spin_unlock_irqrestore(&rq->lock, *flags);
+		raw_spin_unlock(&rq->lock);
+		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
 	}
 }
 
@@ -961,10 +975,13 @@ static void __task_rq_unlock(struct rq *rq)
 	raw_spin_unlock(&rq->lock);
 }
 
-static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
 	__releases(rq->lock)
+	__releases(p->pi_lock)
 {
-	raw_spin_unlock_irqrestore(&rq->lock, *flags);
+	raw_spin_unlock(&rq->lock);
+	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
 }
 
 /*
@@ -1193,11 +1210,17 @@ int get_nohz_timer_target(void)
 	int i;
 	struct sched_domain *sd;
 
+	rcu_read_lock();
 	for_each_domain(cpu, sd) {
-		for_each_cpu(i, sched_domain_span(sd))
-			if (!idle_cpu(i))
-				return i;
+		for_each_cpu(i, sched_domain_span(sd)) {
+			if (!idle_cpu(i)) {
+				cpu = i;
+				goto unlock;
+			}
+		}
 	}
+unlock:
+	rcu_read_unlock();
 	return cpu;
 }
 /*
@@ -1307,15 +1330,15 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
 {
 	u64 tmp;
 
+	tmp = (u64)delta_exec * weight;
+
 	if (!lw->inv_weight) {
 		if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
 			lw->inv_weight = 1;
 		else
-			lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)
-				/ (lw->weight+1);
+			lw->inv_weight = WMULT_CONST / lw->weight;
 	}
 
-	tmp = (u64)delta_exec * weight;
 	/*
 	 * Check whether we'd overflow the 64-bit multiplication:
 	 */
@@ -1773,7 +1796,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 	update_rq_clock(rq);
 	sched_info_queued(p);
 	p->sched_class->enqueue_task(rq, p, flags);
-	p->se.on_rq = 1;
 }
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1781,7 +1803,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 	update_rq_clock(rq);
 	sched_info_dequeued(p);
 	p->sched_class->dequeue_task(rq, p, flags);
-	p->se.on_rq = 0;
 }
 
 /*
@@ -2116,7 +2137,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
+	if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
 		rq->skip_clock_update = 1;
 }
 
@@ -2162,6 +2183,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	 */
 	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
 			!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+
+#ifdef CONFIG_LOCKDEP
+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
+				      lockdep_is_held(&task_rq(p)->lock)));
+#endif
 #endif
 
 	trace_sched_migrate_task(p, new_cpu);
@@ -2182,19 +2208,6 @@ struct migration_arg {
 static int migration_cpu_stop(void *data);
 
 /*
- * The task's runqueue lock must be held.
- * Returns true if you have to wait for migration thread.
- */
-static bool migrate_task(struct task_struct *p, struct rq *rq)
-{
-	/*
-	 * If the task is not on a runqueue (and not running), then
-	 * the next wake-up will properly place the task.
-	 */
-	return p->se.on_rq || task_running(rq, p);
-}
-
-/*
  * wait_task_inactive - wait for a thread to unschedule.
  *
  * If @match_state is nonzero, it's the @p->state value just checked and
@@ -2251,11 +2264,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		rq = task_rq_lock(p, &flags);
 		trace_sched_wait_task(p);
 		running = task_running(rq, p);
-		on_rq = p->se.on_rq;
+		on_rq = p->on_rq;
 		ncsw = 0;
 		if (!match_state || p->state == match_state)
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-		task_rq_unlock(rq, &flags);
+		task_rq_unlock(rq, p, &flags);
 
 		/*
 		 * If it changed from the expected state, bail out now.
@@ -2330,7 +2343,7 @@ EXPORT_SYMBOL_GPL(kick_process);
 
 #ifdef CONFIG_SMP
 /*
- * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
+ * ->cpus_allowed is protected by both rq->lock and p->pi_lock
  */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
@@ -2363,12 +2376,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 }
 
 /*
- * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 {
-	int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+	int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -2394,27 +2407,62 @@ static void update_avg(u64 *avg, u64 sample)
 }
 #endif
 
-static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
-				 bool is_sync, bool is_migrate, bool is_local,
-				 unsigned long en_flags)
+static void
+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 {
+#ifdef CONFIG_SCHEDSTATS
+	struct rq *rq = this_rq();
+
+#ifdef CONFIG_SMP
+	int this_cpu = smp_processor_id();
+
+	if (cpu == this_cpu) {
+		schedstat_inc(rq, ttwu_local);
+		schedstat_inc(p, se.statistics.nr_wakeups_local);
+	} else {
+		struct sched_domain *sd;
+
+		schedstat_inc(p, se.statistics.nr_wakeups_remote);
+		rcu_read_lock();
+		for_each_domain(this_cpu, sd) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
+				schedstat_inc(sd, ttwu_wake_remote);
+				break;
+			}
+		}
+		rcu_read_unlock();
+	}
+#endif /* CONFIG_SMP */
+
+	schedstat_inc(rq, ttwu_count);
 	schedstat_inc(p, se.statistics.nr_wakeups);
-	if (is_sync)
+
+	if (wake_flags & WF_SYNC)
 		schedstat_inc(p, se.statistics.nr_wakeups_sync);
-	if (is_migrate)
+
+	if (cpu != task_cpu(p))
 		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-	if (is_local)
-		schedstat_inc(p, se.statistics.nr_wakeups_local);
-	else
-		schedstat_inc(p, se.statistics.nr_wakeups_remote);
 
+#endif /* CONFIG_SCHEDSTATS */
+}
+
+static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
+{
 	activate_task(rq, p, en_flags);
+	p->on_rq = 1;
+
+	/* if a worker is waking up, notify workqueue */
+	if (p->flags & PF_WQ_WORKER)
+		wq_worker_waking_up(p, cpu_of(rq));
 }
 
-static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
-					int wake_flags, bool success)
+/*
+ * Mark the task runnable and perform wakeup-preemption.
+ */
+static void
+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 {
-	trace_sched_wakeup(p, success);
+	trace_sched_wakeup(p, true);
 	check_preempt_curr(rq, p, wake_flags);
 
 	p->state = TASK_RUNNING;
@@ -2433,9 +2481,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
 		rq->idle_stamp = 0;
 	}
 #endif
-	/* if a worker is waking up, notify workqueue */
-	if ((p->flags & PF_WQ_WORKER) && success)
-		wq_worker_waking_up(p, cpu_of(rq));
+}
+
+static void
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+#ifdef CONFIG_SMP
+	if (p->sched_contributes_to_load)
+		rq->nr_uninterruptible--;
+#endif
+
+	ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
+	ttwu_do_wakeup(rq, p, wake_flags);
+}
+
+/*
+ * Called in case the task @p isn't fully descheduled from its runqueue,
+ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
+ * since all we need to do is flip p->state to TASK_RUNNING, since
+ * the task is still ->on_rq.
+ */
+static int ttwu_remote(struct task_struct *p, int wake_flags)
+{
+	struct rq *rq;
+	int ret = 0;
+
+	rq = __task_rq_lock(p);
+	if (p->on_rq) {
+		ttwu_do_wakeup(rq, p, wake_flags);
+		ret = 1;
+	}
+	__task_rq_unlock(rq);
+
+	return ret;
+}
+
+#ifdef CONFIG_SMP
+static void sched_ttwu_pending(void)
+{
+	struct rq *rq = this_rq();
+	struct task_struct *list = xchg(&rq->wake_list, NULL);
+
+	if (!list)
+		return;
+
+	raw_spin_lock(&rq->lock);
+
+	while (list) {
+		struct task_struct *p = list;
+		list = list->wake_entry;
+		ttwu_do_activate(rq, p, 0);
+	}
+
+	raw_spin_unlock(&rq->lock);
+}
+
+void scheduler_ipi(void)
+{
+	sched_ttwu_pending();
+}
+
+static void ttwu_queue_remote(struct task_struct *p, int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	struct task_struct *next = rq->wake_list;
+
+	for (;;) {
+		struct task_struct *old = next;
+
+		p->wake_entry = next;
+		next = cmpxchg(&rq->wake_list, old, p);
+		if (next == old)
+			break;
+	}
+
+	if (!next)
+		smp_send_reschedule(cpu);
+}
+#endif
+
+static void ttwu_queue(struct task_struct *p, int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
+	if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+		ttwu_queue_remote(p, cpu);
+		return;
+	}
+#endif
+
+	raw_spin_lock(&rq->lock);
+	ttwu_do_activate(rq, p, 0);
+	raw_spin_unlock(&rq->lock);
 }
 
 /**
@@ -2453,92 +2591,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
  * Returns %true if @p was woken up, %false if it was already running
  * or @state didn't match @p's state.
  */
-static int try_to_wake_up(struct task_struct *p, unsigned int state,
-			  int wake_flags)
+static int
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
-	int cpu, orig_cpu, this_cpu, success = 0;
 	unsigned long flags;
-	unsigned long en_flags = ENQUEUE_WAKEUP;
-	struct rq *rq;
-
-	this_cpu = get_cpu();
+	int cpu, success = 0;
 
 	smp_wmb();
-	rq = task_rq_lock(p, &flags);
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	if (!(p->state & state))
 		goto out;
 
-	if (p->se.on_rq)
-		goto out_running;
-
+	success = 1; /* we're going to change ->state */
 	cpu = task_cpu(p);
-	orig_cpu = cpu;
 
-#ifdef CONFIG_SMP
-	if (unlikely(task_running(rq, p)))
-		goto out_activate;
+	if (p->on_rq && ttwu_remote(p, wake_flags))
+		goto stat;
 
+#ifdef CONFIG_SMP
 	/*
-	 * In order to handle concurrent wakeups and release the rq->lock
-	 * we put the task in TASK_WAKING state.
-	 *
-	 * First fix up the nr_uninterruptible count:
+	 * If the owning (remote) cpu is still in the middle of schedule() with
+	 * this task as prev, wait until its done referencing the task.
 	 */
-	if (task_contributes_to_load(p)) {
-		if (likely(cpu_online(orig_cpu)))
-			rq->nr_uninterruptible--;
-		else
-			this_rq()->nr_uninterruptible--;
-	}
-	p->state = TASK_WAKING;
-
-	if (p->sched_class->task_waking) {
-		p->sched_class->task_waking(rq, p);
-		en_flags |= ENQUEUE_WAKING;
+	while (p->on_cpu) {
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+		/*
+		 * If called from interrupt context we could have landed in the
+		 * middle of schedule(), in this case we should take care not
+		 * to spin on ->on_cpu if p is current, since that would
+		 * deadlock.
+		 */
+		if (p == current) {
+			ttwu_queue(p, cpu);
+			goto stat;
+		}
+#endif
+		cpu_relax();
 	}
-
-	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
-	if (cpu != orig_cpu)
-		set_task_cpu(p, cpu);
-	__task_rq_unlock(rq);
-
-	rq = cpu_rq(cpu);
-	raw_spin_lock(&rq->lock);
-
 	/*
-	 * We migrated the task without holding either rq->lock, however
-	 * since the task is not on the task list itself, nobody else
-	 * will try and migrate the task, hence the rq should match the
-	 * cpu we just moved it to.
+	 * Pairs with the smp_wmb() in finish_lock_switch().
 	 */
-	WARN_ON(task_cpu(p) != cpu);
-	WARN_ON(p->state != TASK_WAKING);
+	smp_rmb();
 
-#ifdef CONFIG_SCHEDSTATS
-	schedstat_inc(rq, ttwu_count);
-	if (cpu == this_cpu)
-		schedstat_inc(rq, ttwu_local);
-	else {
-		struct sched_domain *sd;
-		for_each_domain(this_cpu, sd) {
-			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-				schedstat_inc(sd, ttwu_wake_remote);
-				break;
-			}
-		}
-	}
-#endif /* CONFIG_SCHEDSTATS */
+	p->sched_contributes_to_load = !!task_contributes_to_load(p);
+	p->state = TASK_WAKING;
 
-out_activate:
+	if (p->sched_class->task_waking)
+		p->sched_class->task_waking(p);
+
+	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+	if (task_cpu(p) != cpu)
+		set_task_cpu(p, cpu);
 #endif /* CONFIG_SMP */
-	ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
-		      cpu == this_cpu, en_flags);
-	success = 1;
-out_running:
-	ttwu_post_activation(p, rq, wake_flags, success);
+
+	ttwu_queue(p, cpu);
+stat:
+	ttwu_stat(p, cpu, wake_flags);
 out:
-	task_rq_unlock(rq, &flags);
-	put_cpu();
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 	return success;
 }
@@ -2547,31 +2657,34 @@ out:
  * try_to_wake_up_local - try to wake up a local task with rq lock held
  * @p: the thread to be awakened
  *
- * Put @p on the run-queue if it's not already there.  The caller must
+ * Put @p on the run-queue if it's not already there. The caller must
  * ensure that this_rq() is locked, @p is bound to this_rq() and not
- * the current task.  this_rq() stays locked over invocation.
+ * the current task.
  */
 static void try_to_wake_up_local(struct task_struct *p)
 {
 	struct rq *rq = task_rq(p);
-	bool success = false;
 
 	BUG_ON(rq != this_rq());
 	BUG_ON(p == current);
 	lockdep_assert_held(&rq->lock);
 
+	if (!raw_spin_trylock(&p->pi_lock)) {
+		raw_spin_unlock(&rq->lock);
+		raw_spin_lock(&p->pi_lock);
+		raw_spin_lock(&rq->lock);
+	}
+
 	if (!(p->state & TASK_NORMAL))
-		return;
+		goto out;
 
-	if (!p->se.on_rq) {
-		if (likely(!task_running(rq, p))) {
-			schedstat_inc(rq, ttwu_count);
-			schedstat_inc(rq, ttwu_local);
-		}
-		ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
-		success = true;
-	}
-	ttwu_post_activation(p, rq, 0, success);
+	if (!p->on_rq)
+		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+
+	ttwu_do_wakeup(rq, p, 0);
+	ttwu_stat(p, smp_processor_id(), 0);
+out:
+	raw_spin_unlock(&p->pi_lock);
 }
 
 /**
@@ -2604,19 +2717,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
  */
 static void __sched_fork(struct task_struct *p)
 {
+	p->on_rq			= 0;
+
+	p->se.on_rq			= 0;
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
+	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
 	INIT_LIST_HEAD(&p->rt.run_list);
-	p->se.on_rq = 0;
-	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2626,8 +2741,9 @@ static void __sched_fork(struct task_struct *p)
 /*
  * fork()/clone()-time setup:
  */
-void sched_fork(struct task_struct *p, int clone_flags)
+void sched_fork(struct task_struct *p)
 {
+	unsigned long flags;
 	int cpu = get_cpu();
 
 	__sched_fork(p);
@@ -2678,16 +2794,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	 *
 	 * Silence PROVE_RCU.
 	 */
-	rcu_read_lock();
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	set_task_cpu(p, cpu);
-	rcu_read_unlock();
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
-	p->oncpu = 0;
+#if defined(CONFIG_SMP)
+	p->on_cpu = 0;
 #endif
 #ifdef CONFIG_PREEMPT
 	/* Want to start with kernel preemption disabled. */
@@ -2707,41 +2823,31 @@ void sched_fork(struct task_struct *p, int clone_flags)
  * that must be done for every newly created context, then puts the task
  * on the runqueue and wakes it.
  */
-void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+void wake_up_new_task(struct task_struct *p)
 {
 	unsigned long flags;
 	struct rq *rq;
-	int cpu __maybe_unused = get_cpu();
 
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
 #ifdef CONFIG_SMP
-	rq = task_rq_lock(p, &flags);
-	p->state = TASK_WAKING;
-
 	/*
 	 * Fork balancing, do it here and not earlier because:
 	 *  - cpus_allowed can change in the fork path
 	 *  - any previously selected cpu might disappear through hotplug
-	 *
-	 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
-	 * without people poking at ->cpus_allowed.
 	 */
-	cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
-	set_task_cpu(p, cpu);
-
-	p->state = TASK_RUNNING;
-	task_rq_unlock(rq, &flags);
+	set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
 #endif
 
-	rq = task_rq_lock(p, &flags);
+	rq = __task_rq_lock(p);
 	activate_task(rq, p, 0);
-	trace_sched_wakeup_new(p, 1);
+	p->on_rq = 1;
+	trace_sched_wakeup_new(p, true);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_woken)
 		p->sched_class->task_woken(rq, p);
 #endif
-	task_rq_unlock(rq, &flags);
-	put_cpu();
+	task_rq_unlock(rq, p, &flags);
 }
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -3450,27 +3556,22 @@ void sched_exec(void)
 {
 	struct task_struct *p = current;
 	unsigned long flags;
-	struct rq *rq;
 	int dest_cpu;
 
-	rq = task_rq_lock(p, &flags);
-	dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
-	/*
-	 * select_task_rq() can race against ->cpus_allowed
-	 */
-	if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-	    likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
+	if (likely(cpu_active(dest_cpu))) {
 		struct migration_arg arg = { p, dest_cpu };
 
-		task_rq_unlock(rq, &flags);
-		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
 		return;
 	}
 unlock:
-	task_rq_unlock(rq, &flags);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 }
 
 #endif
@@ -3507,7 +3608,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
 
 	rq = task_rq_lock(p, &flags);
 	ns = do_task_delta_exec(p, rq);
-	task_rq_unlock(rq, &flags);
+	task_rq_unlock(rq, p, &flags);
 
 	return ns;
 }
@@ -3525,7 +3626,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 
 	rq = task_rq_lock(p, &flags);
 	ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
-	task_rq_unlock(rq, &flags);
+	task_rq_unlock(rq, p, &flags);
 
 	return ns;
 }
@@ -3549,7 +3650,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
 	rq = task_rq_lock(p, &flags);
 	thread_group_cputime(p, &totals);
 	ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
-	task_rq_unlock(rq, &flags);
+	task_rq_unlock(rq, p, &flags);
 
 	return ns;
 }
@@ -3903,9 +4004,6 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
- *
- * It also gets called by the fork code, when changing the parent's
- * timeslices.
  */
 void scheduler_tick(void)
 {
@@ -4025,17 +4123,11 @@ static inline void schedule_debug(struct task_struct *prev)
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
 	schedstat_inc(this_rq(), sched_count);
-#ifdef CONFIG_SCHEDSTATS
-	if (unlikely(prev->lock_depth >= 0)) {
-		schedstat_inc(this_rq(), rq_sched_info.bkl_count);
-		schedstat_inc(prev, sched_info.bkl_count);
-	}
-#endif
 }
 
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-	if (prev->se.on_rq)
+	if (prev->on_rq || rq->skip_clock_update < 0)
 		update_rq_clock(rq);
 	prev->sched_class->put_prev_task(rq, prev);
 }
@@ -4097,11 +4189,13 @@ need_resched:
 		if (unlikely(signal_pending_state(prev->state, prev))) {
 			prev->state = TASK_RUNNING;
 		} else {
+			deactivate_task(rq, prev, DEQUEUE_SLEEP);
+			prev->on_rq = 0;
+
 			/*
-			 * If a worker is going to sleep, notify and
-			 * ask workqueue whether it wants to wake up a
-			 * task to maintain concurrency.  If so, wake
-			 * up the task.
+			 * If a worker went to sleep, notify and ask workqueue
+			 * whether it wants to wake up a task to maintain
+			 * concurrency.
 			 */
 			if (prev->flags & PF_WQ_WORKER) {
 				struct task_struct *to_wakeup;
@@ -4110,21 +4204,20 @@ need_resched:
 				if (to_wakeup)
 					try_to_wake_up_local(to_wakeup);
 			}
-			deactivate_task(rq, prev, DEQUEUE_SLEEP);
+
+			/*
+			 * If we are going to sleep and we have plugged IO
+			 * queued, make sure to submit it to avoid deadlocks.
+			 */
+			if (blk_needs_flush_plug(prev)) {
+				raw_spin_unlock(&rq->lock);
+				blk_schedule_flush_plug(prev);
+				raw_spin_lock(&rq->lock);
+			}
 		}
 		switch_count = &prev->nvcsw;
 	}
 
-	/*
-	 * If we are going to sleep and we have plugged IO queued, make
-	 * sure to submit it to avoid deadlocks.
-	 */
-	if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
-		raw_spin_unlock(&rq->lock);
-		blk_flush_plug(prev);
-		raw_spin_lock(&rq->lock);
-	}
-
 	pre_schedule(rq, prev);
 
 	if (unlikely(!rq->nr_running))
@@ -4161,70 +4254,53 @@ need_resched:
 EXPORT_SYMBOL(schedule);
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-/*
- * Look out! "owner" is an entirely speculative pointer
- * access and not reliable.
- */
-int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
-{
-	unsigned int cpu;
-	struct rq *rq;
 
-	if (!sched_feat(OWNER_SPIN))
-		return 0;
+static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
+{
+	bool ret = false;
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	/*
-	 * Need to access the cpu field knowing that
-	 * DEBUG_PAGEALLOC could have unmapped it if
-	 * the mutex owner just released it and exited.
-	 */
-	if (probe_kernel_address(&owner->cpu, cpu))
-		return 0;
-#else
-	cpu = owner->cpu;
-#endif
+	rcu_read_lock();
+	if (lock->owner != owner)
+		goto fail;
 
 	/*
-	 * Even if the access succeeded (likely case),
-	 * the cpu field may no longer be valid.
+	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
+	 * lock->owner still matches owner, if that fails, owner might
+	 * point to free()d memory, if it still matches, the rcu_read_lock()
+	 * ensures the memory stays valid.
 	 */
-	if (cpu >= nr_cpumask_bits)
-		return 0;
+	barrier();
 
-	/*
-	 * We need to validate that we can do a
-	 * get_cpu() and that we have the percpu area.
-	 */
-	if (!cpu_online(cpu))
-		return 0;
+	ret = owner->on_cpu;
+fail:
+	rcu_read_unlock();
 
-	rq = cpu_rq(cpu);
+	return ret;
+}
 
-	for (;;) {
-		/*
-		 * Owner changed, break to re-assess state.
-		 */
-		if (lock->owner != owner) {
-			/*
-			 * If the lock has switched to a different owner,
-			 * we likely have heavy contention. Return 0 to quit
-			 * optimistic spinning and not contend further:
-			 */
-			if (lock->owner)
-				return 0;
-			break;
-		}
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+{
+	if (!sched_feat(OWNER_SPIN))
+		return 0;
 
-		/*
-		 * Is that owner really running on that cpu?
-		 */
-		if (task_thread_info(rq->curr) != owner || need_resched())
+	while (owner_running(lock, owner)) {
+		if (need_resched())
 			return 0;
 
 		arch_mutex_cpu_relax();
 	}
 
+	/*
+	 * If the owner changed to another task there is likely
+	 * heavy contention, stop spinning.
+	 */
+	if (lock->owner)
+		return 0;
+
 	return 1;
 }
 #endif
@@ -4684,19 +4760,18 @@ EXPORT_SYMBOL(sleep_on_timeout);
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
-	unsigned long flags;
 	int oldprio, on_rq, running;
 	struct rq *rq;
 	const struct sched_class *prev_class;
 
 	BUG_ON(prio < 0 || prio > MAX_PRIO);
 
-	rq = task_rq_lock(p, &flags);
+	rq = __task_rq_lock(p);
 
 	trace_sched_pi_setprio(p, prio);
 	oldprio = p->prio;
 	prev_class = p->sched_class;
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		dequeue_task(rq, p, 0);
@@ -4716,7 +4791,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 		enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
 
 	check_class_changed(rq, p, prev_class, oldprio);
-	task_rq_unlock(rq, &flags);
+	__task_rq_unlock(rq);
 }
 
 #endif
@@ -4744,7 +4819,7 @@ void set_user_nice(struct task_struct *p, long nice)
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		dequeue_task(rq, p, 0);
 
@@ -4764,7 +4839,7 @@ void set_user_nice(struct task_struct *p, long nice)
 			resched_task(rq->curr);
 	}
 out_unlock:
-	task_rq_unlock(rq, &flags);
+	task_rq_unlock(rq, p, &flags);
 }
 EXPORT_SYMBOL(set_user_nice);
 
@@ -4878,8 +4953,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 static void
 __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 {
-	BUG_ON(p->se.on_rq);
-
 	p->policy = policy;
 	p->rt_priority = prio;
 	p->normal_prio = normal_prio(p);
@@ -4994,20 +5067,17 @@ recheck:
 	/*
 	 * make sure no PI-waiters arrive (or leave) while we are
 	 * changing the priority of the task:
-	 */
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	/*
+	 *
 	 * To be able to change p->policy safely, the appropriate
 	 * runqueue lock must be held.
 	 */
-	rq = __task_rq_lock(p);
+	rq = task_rq_lock(p, &flags);
 
 	/*
 	 * Changing the policy of the stop threads its a very bad idea
 	 */
 	if (p == rq->stop) {
-		__task_rq_unlock(rq);
-		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		task_rq_unlock(rq, p, &flags);
 		return -EINVAL;
 	}
 
@@ -5031,8 +5101,7 @@ recheck:
 		if (rt_bandwidth_enabled() && rt_policy(policy) &&
 				task_group(p)->rt_bandwidth.rt_runtime == 0 &&
 				!task_group_is_autogroup(task_group(p))) {
-			__task_rq_unlock(rq);
-			raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+			task_rq_unlock(rq, p, &flags);
 			return -EPERM;
 		}
 	}
@@ -5041,11 +5110,10 @@ recheck:
 	/* recheck policy now with rq lock held */
 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
 		policy = oldpolicy = -1;
-		__task_rq_unlock(rq);
-		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		task_rq_unlock(rq, p, &flags);
 		goto recheck;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		deactivate_task(rq, p, 0);
@@ -5064,8 +5132,7 @@ recheck:
 		activate_task(rq, p, 0);
 
 	check_class_changed(rq, p, prev_class, oldprio);
-	__task_rq_unlock(rq);
-	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+	task_rq_unlock(rq, p, &flags);
 
 	rt_mutex_adjust_pi(p);
 
@@ -5316,7 +5383,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
 {
 	struct task_struct *p;
 	unsigned long flags;
-	struct rq *rq;
 	int retval;
 
 	get_online_cpus();
@@ -5331,9 +5397,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
 	if (retval)
 		goto out_unlock;
 
-	rq = task_rq_lock(p, &flags);
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
-	task_rq_unlock(rq, &flags);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 out_unlock:
 	rcu_read_unlock();
@@ -5658,7 +5724,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 
 	rq = task_rq_lock(p, &flags);
 	time_slice = p->sched_class->get_rr_interval(rq, p);
-	task_rq_unlock(rq, &flags);
+	task_rq_unlock(rq, p, &flags);
 
 	rcu_read_unlock();
 	jiffies_to_timespec(time_slice, &t);
@@ -5776,17 +5842,14 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	rcu_read_unlock();
 
 	rq->curr = rq->idle = idle;
-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
-	idle->oncpu = 1;
+#if defined(CONFIG_SMP)
+	idle->on_cpu = 1;
 #endif
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
-#if defined(CONFIG_PREEMPT)
-	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
-#else
 	task_thread_info(idle)->preempt_count = 0;
-#endif
+
 	/*
 	 * The idle tasks have their own, simple scheduling class:
 	 */
@@ -5881,26 +5944,17 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 	unsigned int dest_cpu;
 	int ret = 0;
 
-	/*
-	 * Serialize against TASK_WAKING so that ttwu() and wunt() can
-	 * drop the rq->lock and still rely on ->cpus_allowed.
-	 */
-again:
-	while (task_is_waking(p))
-		cpu_relax();
 	rq = task_rq_lock(p, &flags);
-	if (task_is_waking(p)) {
-		task_rq_unlock(rq, &flags);
-		goto again;
-	}
+
+	if (cpumask_equal(&p->cpus_allowed, new_mask))
+		goto out;
 
 	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
-		     !cpumask_equal(&p->cpus_allowed, new_mask))) {
+	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -5917,16 +5971,16 @@ again:
 		goto out;
 
 	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-	if (migrate_task(p, rq)) {
+	if (p->on_rq) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
-		task_rq_unlock(rq, &flags);
+		task_rq_unlock(rq, p, &flags);
 		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
 		tlb_migrate_finish(p->mm);
 		return 0;
 	}
 out:
-	task_rq_unlock(rq, &flags);
+	task_rq_unlock(rq, p, &flags);
 
 	return ret;
 }
@@ -5954,6 +6008,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	rq_src = cpu_rq(src_cpu);
 	rq_dest = cpu_rq(dest_cpu);
 
+	raw_spin_lock(&p->pi_lock);
 	double_rq_lock(rq_src, rq_dest);
 	/* Already moved. */
 	if (task_cpu(p) != src_cpu)
@@ -5966,7 +6021,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	 * If we're not on a rq, the next wake-up will ensure we're
 	 * placed properly.
 	 */
-	if (p->se.on_rq) {
+	if (p->on_rq) {
 		deactivate_task(rq_src, p, 0);
 		set_task_cpu(p, dest_cpu);
 		activate_task(rq_dest, p, 0);
@@ -5976,6 +6031,7 @@ done:
 	ret = 1;
 fail:
 	double_rq_unlock(rq_src, rq_dest);
+	raw_spin_unlock(&p->pi_lock);
 	return ret;
 }
 
@@ -6316,6 +6372,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_DYING:
+		sched_ttwu_pending();
 		/* Update our root-domain */
 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
@@ -6394,6 +6451,8 @@ early_initcall(migration_init);
 
 #ifdef CONFIG_SMP
 
+static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
+
 #ifdef CONFIG_SCHED_DEBUG
 
 static __read_mostly int sched_domain_debug_enabled;
@@ -6489,7 +6548,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 static void sched_domain_debug(struct sched_domain *sd, int cpu)
 {
-	cpumask_var_t groupmask;
 	int level = 0;
 
 	if (!sched_domain_debug_enabled)
@@ -6502,20 +6560,14 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
 
-	if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
-		printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
-		return;
-	}
-
 	for (;;) {
-		if (sched_domain_debug_one(sd, cpu, level, groupmask))
+		if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
 			break;
 		level++;
 		sd = sd->parent;
 		if (!sd)
 			break;
 	}
-	free_cpumask_var(groupmask);
 }
 #else /* !CONFIG_SCHED_DEBUG */
 # define sched_domain_debug(sd, cpu) do { } while (0)
@@ -6572,12 +6624,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
-static void free_rootdomain(struct root_domain *rd)
+static void free_rootdomain(struct rcu_head *rcu)
 {
-	synchronize_sched();
+	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
 
 	cpupri_cleanup(&rd->cpupri);
-
 	free_cpumask_var(rd->rto_mask);
 	free_cpumask_var(rd->online);
 	free_cpumask_var(rd->span);
@@ -6618,7 +6669,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 
 	if (old_rd)
-		free_rootdomain(old_rd);
+		call_rcu_sched(&old_rd->rcu, free_rootdomain);
 }
 
 static int init_rootdomain(struct root_domain *rd)
@@ -6669,6 +6720,25 @@ static struct root_domain *alloc_rootdomain(void)
 	return rd;
 }
 
+static void free_sched_domain(struct rcu_head *rcu)
+{
+	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+	if (atomic_dec_and_test(&sd->groups->ref))
+		kfree(sd->groups);
+	kfree(sd);
+}
+
+static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+{
+	call_rcu(&sd->rcu, free_sched_domain);
+}
+
+static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+{
+	for (; sd; sd = sd->parent)
+		destroy_sched_domain(sd, cpu);
+}
+
 /*
  * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
  * hold the hotplug lock.
@@ -6679,9 +6749,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 	struct rq *rq = cpu_rq(cpu);
 	struct sched_domain *tmp;
 
-	for (tmp = sd; tmp; tmp = tmp->parent)
-		tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
-
 	/* Remove the sched domains which do not contribute to scheduling. */
 	for (tmp = sd; tmp; ) {
 		struct sched_domain *parent = tmp->parent;
@@ -6692,12 +6759,15 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 			tmp->parent = parent->parent;
 			if (parent->parent)
 				parent->parent->child = tmp;
+			destroy_sched_domain(parent, cpu);
 		} else
 			tmp = tmp->parent;
 	}
 
 	if (sd && sd_degenerate(sd)) {
+		tmp = sd;
 		sd = sd->parent;
+		destroy_sched_domain(tmp, cpu);
 		if (sd)
 			sd->child = NULL;
 	}
@@ -6705,7 +6775,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 	sched_domain_debug(sd, cpu);
 
 	rq_attach_root(rq, rd);
+	tmp = rq->sd;
 	rcu_assign_pointer(rq->sd, sd);
+	destroy_sched_domains(tmp, cpu);
 }
 
 /* cpus with isolated domains */
@@ -6721,56 +6793,6 @@ static int __init isolated_cpu_setup(char *str)
 
 __setup("isolcpus=", isolated_cpu_setup);
 
-/*
- * init_sched_build_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
- * init_sched_build_groups will build a circular linked list of the groups
- * covered by the given span, and will set each group's ->cpumask correctly,
- * and ->cpu_power to 0.
- */
-static void
-init_sched_build_groups(const struct cpumask *span,
-			const struct cpumask *cpu_map,
-			int (*group_fn)(int cpu, const struct cpumask *cpu_map,
-					struct sched_group **sg,
-					struct cpumask *tmpmask),
-			struct cpumask *covered, struct cpumask *tmpmask)
-{
-	struct sched_group *first = NULL, *last = NULL;
-	int i;
-
-	cpumask_clear(covered);
-
-	for_each_cpu(i, span) {
-		struct sched_group *sg;
-		int group = group_fn(i, cpu_map, &sg, tmpmask);
-		int j;
-
-		if (cpumask_test_cpu(i, covered))
-			continue;
-
-		cpumask_clear(sched_group_cpus(sg));
-		sg->cpu_power = 0;
-
-		for_each_cpu(j, span) {
-			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
-				continue;
-
-			cpumask_set_cpu(j, covered);
-			cpumask_set_cpu(j, sched_group_cpus(sg));
-		}
-		if (!first)
-			first = sg;
-		if (last)
-			last->next = sg;
-		last = sg;
-	}
-	last->next = first;
-}
-
 #define SD_NODES_PER_DOMAIN 16
 
 #ifdef CONFIG_NUMA
@@ -6787,7 +6809,7 @@ init_sched_build_groups(const struct cpumask *span,
  */
 static int find_next_best_node(int node, nodemask_t *used_nodes)
 {
-	int i, n, val, min_val, best_node = 0;
+	int i, n, val, min_val, best_node = -1;
 
 	min_val = INT_MAX;
 
@@ -6811,7 +6833,8 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
 		}
 	}
 
-	node_set(best_node, *used_nodes);
+	if (best_node != -1)
+		node_set(best_node, *used_nodes);
 	return best_node;
 }
 
@@ -6837,315 +6860,130 @@ static void sched_domain_node_span(int node, struct cpumask *span)
 
 	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
 		int next_node = find_next_best_node(node, &used_nodes);
-
+		if (next_node < 0)
+			break;
 		cpumask_or(span, span, cpumask_of_node(next_node));
 	}
 }
+
+static const struct cpumask *cpu_node_mask(int cpu)
+{
+	lockdep_assert_held(&sched_domains_mutex);
+
+	sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask);
+
+	return sched_domains_tmpmask;
+}
+
+static const struct cpumask *cpu_allnodes_mask(int cpu)
+{
+	return cpu_possible_mask;
+}
 #endif /* CONFIG_NUMA */
 
-int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
+static const struct cpumask *cpu_cpu_mask(int cpu)
+{
+	return cpumask_of_node(cpu_to_node(cpu));
+}
 
-/*
- * The cpus mask in sched_group and sched_domain hangs off the end.
- *
- * ( See the the comments in include/linux/sched.h:struct sched_group
- *   and struct sched_domain. )
- */
-struct static_sched_group {
-	struct sched_group sg;
-	DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
-};
+int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
-struct static_sched_domain {
-	struct sched_domain sd;
-	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
+struct sd_data {
+	struct sched_domain **__percpu sd;
+	struct sched_group **__percpu sg;
 };
 
 struct s_data {
-#ifdef CONFIG_NUMA
-	int			sd_allnodes;
-	cpumask_var_t		domainspan;
-	cpumask_var_t		covered;
-	cpumask_var_t		notcovered;
-#endif
-	cpumask_var_t		nodemask;
-	cpumask_var_t		this_sibling_map;
-	cpumask_var_t		this_core_map;
-	cpumask_var_t		this_book_map;
-	cpumask_var_t		send_covered;
-	cpumask_var_t		tmpmask;
-	struct sched_group	**sched_group_nodes;
+	struct sched_domain ** __percpu sd;
 	struct root_domain	*rd;
 };
 
 enum s_alloc {
-	sa_sched_groups = 0,
 	sa_rootdomain,
-	sa_tmpmask,
-	sa_send_covered,
-	sa_this_book_map,
-	sa_this_core_map,
-	sa_this_sibling_map,
-	sa_nodemask,
-	sa_sched_group_nodes,
-#ifdef CONFIG_NUMA
-	sa_notcovered,
-	sa_covered,
-	sa_domainspan,
-#endif
+	sa_sd,
+	sa_sd_storage,
 	sa_none,
 };
 
-/*
- * SMT sched-domains:
- */
-#ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
+struct sched_domain_topology_level;
 
-static int
-cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
-		 struct sched_group **sg, struct cpumask *unused)
-{
-	if (sg)
-		*sg = &per_cpu(sched_groups, cpu).sg;
-	return cpu;
-}
-#endif /* CONFIG_SCHED_SMT */
+typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
 
-/*
- * multi-core sched-domains:
- */
-#ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
-
-static int
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
-{
-	int group;
-#ifdef CONFIG_SCHED_SMT
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#else
-	group = cpu;
-#endif
-	if (sg)
-		*sg = &per_cpu(sched_group_core, group).sg;
-	return group;
-}
-#endif /* CONFIG_SCHED_MC */
+struct sched_domain_topology_level {
+	sched_domain_init_f init;
+	sched_domain_mask_f mask;
+	struct sd_data      data;
+};
 
 /*
- * book sched-domains:
+ * Assumes the sched_domain tree is fully constructed
  */
-#ifdef CONFIG_SCHED_BOOK
-static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
-
-static int
-cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
+static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 {
-	int group = cpu;
-#ifdef CONFIG_SCHED_MC
-	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#endif
-	if (sg)
-		*sg = &per_cpu(sched_group_book, group).sg;
-	return group;
-}
-#endif /* CONFIG_SCHED_BOOK */
+	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+	struct sched_domain *child = sd->child;
 
-static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
+	if (child)
+		cpu = cpumask_first(sched_domain_span(child));
 
-static int
-cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
-{
-	int group;
-#ifdef CONFIG_SCHED_BOOK
-	cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_MC)
-	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-	group = cpumask_first(mask);
-#else
-	group = cpu;
-#endif
 	if (sg)
-		*sg = &per_cpu(sched_group_phys, group).sg;
-	return group;
+		*sg = *per_cpu_ptr(sdd->sg, cpu);
+
+	return cpu;
 }
 
-#ifdef CONFIG_NUMA
 /*
- * The init_sched_build_groups can't handle what we want to do with node
- * groups, so roll our own. Now each node has its own list of groups which
- * gets dynamically allocated.
+ * build_sched_groups takes the cpumask we wish to span, and a pointer
+ * to a function which identifies what group(along with sched group) a CPU
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
+ *
+ * build_sched_groups will build a circular linked list of the groups
+ * covered by the given span, and will set each group's ->cpumask correctly,
+ * and ->cpu_power to 0.
  */
-static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
-static struct sched_group ***sched_group_nodes_bycpu;
-
-static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
-
-static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
-				 struct sched_group **sg,
-				 struct cpumask *nodemask)
-{
-	int group;
-
-	cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
-	group = cpumask_first(nodemask);
-
-	if (sg)
-		*sg = &per_cpu(sched_group_allnodes, group).sg;
-	return group;
-}
-
-static void init_numa_sched_groups_power(struct sched_group *group_head)
-{
-	struct sched_group *sg = group_head;
-	int j;
-
-	if (!sg)
-		return;
-	do {
-		for_each_cpu(j, sched_group_cpus(sg)) {
-			struct sched_domain *sd;
-
-			sd = &per_cpu(phys_domains, j).sd;
-			if (j != group_first_cpu(sd->groups)) {
-				/*
-				 * Only add "power" once for each
-				 * physical package.
-				 */
-				continue;
-			}
-
-			sg->cpu_power += sd->groups->cpu_power;
-		}
-		sg = sg->next;
-	} while (sg != group_head);
-}
-
-static int build_numa_sched_groups(struct s_data *d,
-				   const struct cpumask *cpu_map, int num)
+static void
+build_sched_groups(struct sched_domain *sd)
 {
-	struct sched_domain *sd;
-	struct sched_group *sg, *prev;
-	int n, j;
-
-	cpumask_clear(d->covered);
-	cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map);
-	if (cpumask_empty(d->nodemask)) {
-		d->sched_group_nodes[num] = NULL;
-		goto out;
-	}
-
-	sched_domain_node_span(num, d->domainspan);
-	cpumask_and(d->domainspan, d->domainspan, cpu_map);
-
-	sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
-			  GFP_KERNEL, num);
-	if (!sg) {
-		printk(KERN_WARNING "Can not alloc domain group for node %d\n",
-		       num);
-		return -ENOMEM;
-	}
-	d->sched_group_nodes[num] = sg;
-
-	for_each_cpu(j, d->nodemask) {
-		sd = &per_cpu(node_domains, j).sd;
-		sd->groups = sg;
-	}
-
-	sg->cpu_power = 0;
-	cpumask_copy(sched_group_cpus(sg), d->nodemask);
-	sg->next = sg;
-	cpumask_or(d->covered, d->covered, d->nodemask);
+	struct sched_group *first = NULL, *last = NULL;
+	struct sd_data *sdd = sd->private;
+	const struct cpumask *span = sched_domain_span(sd);
+	struct cpumask *covered;
+	int i;
 
-	prev = sg;
-	for (j = 0; j < nr_node_ids; j++) {
-		n = (num + j) % nr_node_ids;
-		cpumask_complement(d->notcovered, d->covered);
-		cpumask_and(d->tmpmask, d->notcovered, cpu_map);
-		cpumask_and(d->tmpmask, d->tmpmask, d->domainspan);
-		if (cpumask_empty(d->tmpmask))
-			break;
-		cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n));
-		if (cpumask_empty(d->tmpmask))
-			continue;
-		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
-				  GFP_KERNEL, num);
-		if (!sg) {
-			printk(KERN_WARNING
-			       "Can not alloc domain group for node %d\n", j);
-			return -ENOMEM;
-		}
-		sg->cpu_power = 0;
-		cpumask_copy(sched_group_cpus(sg), d->tmpmask);
-		sg->next = prev->next;
-		cpumask_or(d->covered, d->covered, d->tmpmask);
-		prev->next = sg;
-		prev = sg;
-	}
-out:
-	return 0;
-}
-#endif /* CONFIG_NUMA */
+	lockdep_assert_held(&sched_domains_mutex);
+	covered = sched_domains_tmpmask;
 
-#ifdef CONFIG_NUMA
-/* Free memory allocated for various sched_group structures */
-static void free_sched_groups(const struct cpumask *cpu_map,
-			      struct cpumask *nodemask)
-{
-	int cpu, i;
+	cpumask_clear(covered);
 
-	for_each_cpu(cpu, cpu_map) {
-		struct sched_group **sched_group_nodes
-			= sched_group_nodes_bycpu[cpu];
+	for_each_cpu(i, span) {
+		struct sched_group *sg;
+		int group = get_group(i, sdd, &sg);
+		int j;
 
-		if (!sched_group_nodes)
+		if (cpumask_test_cpu(i, covered))
 			continue;
 
-		for (i = 0; i < nr_node_ids; i++) {
-			struct sched_group *oldsg, *sg = sched_group_nodes[i];
+		cpumask_clear(sched_group_cpus(sg));
+		sg->cpu_power = 0;
 
-			cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
-			if (cpumask_empty(nodemask))
+		for_each_cpu(j, span) {
+			if (get_group(j, sdd, NULL) != group)
 				continue;
 
-			if (sg == NULL)
-				continue;
-			sg = sg->next;
-next_sg:
-			oldsg = sg;
-			sg = sg->next;
-			kfree(oldsg);
-			if (oldsg != sched_group_nodes[i])
-				goto next_sg;
+			cpumask_set_cpu(j, covered);
+			cpumask_set_cpu(j, sched_group_cpus(sg));
 		}
-		kfree(sched_group_nodes);
-		sched_group_nodes_bycpu[cpu] = NULL;
+
+		if (!first)
+			first = sg;
+		if (last)
+			last->next = sg;
+		last = sg;
 	}
+	last->next = first;
 }
-#else /* !CONFIG_NUMA */
-static void free_sched_groups(const struct cpumask *cpu_map,
-			      struct cpumask *nodemask)
-{
-}
-#endif /* CONFIG_NUMA */
 
 /*
  * Initialize sched groups cpu_power.
@@ -7159,11 +6997,6 @@ static void free_sched_groups(const struct cpumask *cpu_map,
  */
 static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 {
-	struct sched_domain *child;
-	struct sched_group *group;
-	long power;
-	int weight;
-
 	WARN_ON(!sd || !sd->groups);
 
 	if (cpu != group_first_cpu(sd->groups))
@@ -7171,36 +7004,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
 	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
 
-	child = sd->child;
-
-	sd->groups->cpu_power = 0;
-
-	if (!child) {
-		power = SCHED_LOAD_SCALE;
-		weight = cpumask_weight(sched_domain_span(sd));
-		/*
-		 * SMT siblings share the power of a single core.
-		 * Usually multiple threads get a better yield out of
-		 * that one core than a single thread would have,
-		 * reflect that in sd->smt_gain.
-		 */
-		if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-			power *= sd->smt_gain;
-			power /= weight;
-			power >>= SCHED_LOAD_SHIFT;
-		}
-		sd->groups->cpu_power += power;
-		return;
-	}
-
-	/*
-	 * Add cpu_power of each child group to this groups cpu_power.
-	 */
-	group = child->groups;
-	do {
-		sd->groups->cpu_power += group->cpu_power;
-		group = group->next;
-	} while (group != child->groups);
+	update_group_power(sd, cpu);
 }
 
 /*
@@ -7214,15 +7018,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 # define SD_INIT_NAME(sd, type)		do { } while (0)
 #endif
 
-#define	SD_INIT(sd, type)	sd_init_##type(sd)
-
-#define SD_INIT_FUNC(type)	\
-static noinline void sd_init_##type(struct sched_domain *sd)	\
-{								\
-	memset(sd, 0, sizeof(*sd));				\
-	*sd = SD_##type##_INIT;					\
-	sd->level = SD_LV_##type;				\
-	SD_INIT_NAME(sd, type);					\
+#define SD_INIT_FUNC(type)						\
+static noinline struct sched_domain *					\
+sd_init_##type(struct sched_domain_topology_level *tl, int cpu) 	\
+{									\
+	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);	\
+	*sd = SD_##type##_INIT;						\
+	SD_INIT_NAME(sd, type);						\
+	sd->private = &tl->data;					\
+	return sd;							\
 }
 
 SD_INIT_FUNC(CPU)
@@ -7241,13 +7045,14 @@ SD_INIT_FUNC(CPU)
 #endif
 
 static int default_relax_domain_level = -1;
+int sched_domain_level_max;
 
 static int __init setup_relax_domain_level(char *str)
 {
 	unsigned long val;
 
 	val = simple_strtoul(str, NULL, 0);
-	if (val < SD_LV_MAX)
+	if (val < sched_domain_level_max)
 		default_relax_domain_level = val;
 
 	return 1;
@@ -7275,37 +7080,20 @@ static void set_domain_attribute(struct sched_domain *sd,
 	}
 }
 
+static void __sdt_free(const struct cpumask *cpu_map);
+static int __sdt_alloc(const struct cpumask *cpu_map);
+
 static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 				 const struct cpumask *cpu_map)
 {
 	switch (what) {
-	case sa_sched_groups:
-		free_sched_groups(cpu_map, d->tmpmask); /* fall through */
-		d->sched_group_nodes = NULL;
 	case sa_rootdomain:
-		free_rootdomain(d->rd); /* fall through */
-	case sa_tmpmask:
-		free_cpumask_var(d->tmpmask); /* fall through */
-	case sa_send_covered:
-		free_cpumask_var(d->send_covered); /* fall through */
-	case sa_this_book_map:
-		free_cpumask_var(d->this_book_map); /* fall through */
-	case sa_this_core_map:
-		free_cpumask_var(d->this_core_map); /* fall through */
-	case sa_this_sibling_map:
-		free_cpumask_var(d->this_sibling_map); /* fall through */
-	case sa_nodemask:
-		free_cpumask_var(d->nodemask); /* fall through */
-	case sa_sched_group_nodes:
-#ifdef CONFIG_NUMA
-		kfree(d->sched_group_nodes); /* fall through */
-	case sa_notcovered:
-		free_cpumask_var(d->notcovered); /* fall through */
-	case sa_covered:
-		free_cpumask_var(d->covered); /* fall through */
-	case sa_domainspan:
-		free_cpumask_var(d->domainspan); /* fall through */
-#endif
+		if (!atomic_read(&d->rd->refcount))
+			free_rootdomain(&d->rd->rcu); /* fall through */
+	case sa_sd:
+		free_percpu(d->sd); /* fall through */
+	case sa_sd_storage:
+		__sdt_free(cpu_map); /* fall through */
 	case sa_none:
 		break;
 	}
@@ -7314,308 +7102,212 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 						   const struct cpumask *cpu_map)
 {
-#ifdef CONFIG_NUMA
-	if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL))
-		return sa_none;
-	if (!alloc_cpumask_var(&d->covered, GFP_KERNEL))
-		return sa_domainspan;
-	if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL))
-		return sa_covered;
-	/* Allocate the per-node list of sched groups */
-	d->sched_group_nodes = kcalloc(nr_node_ids,
-				      sizeof(struct sched_group *), GFP_KERNEL);
-	if (!d->sched_group_nodes) {
-		printk(KERN_WARNING "Can not alloc sched group node list\n");
-		return sa_notcovered;
-	}
-	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
-#endif
-	if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
-		return sa_sched_group_nodes;
-	if (!alloc_cpumask_var(&d->this_sibling_map, GFP_KERNEL))
-		return sa_nodemask;
-	if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
-		return sa_this_sibling_map;
-	if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
-		return sa_this_core_map;
-	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
-		return sa_this_book_map;
-	if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
-		return sa_send_covered;
+	memset(d, 0, sizeof(*d));
+
+	if (__sdt_alloc(cpu_map))
+		return sa_sd_storage;
+	d->sd = alloc_percpu(struct sched_domain *);
+	if (!d->sd)
+		return sa_sd_storage;
 	d->rd = alloc_rootdomain();
-	if (!d->rd) {
-		printk(KERN_WARNING "Cannot alloc root domain\n");
-		return sa_tmpmask;
-	}
+	if (!d->rd)
+		return sa_sd;
 	return sa_rootdomain;
 }
 
-static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
-	const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
+/*
+ * NULL the sd_data elements we've used to build the sched_domain and
+ * sched_group structure so that the subsequent __free_domain_allocs()
+ * will not free the data we're using.
+ */
+static void claim_allocations(int cpu, struct sched_domain *sd)
 {
-	struct sched_domain *sd = NULL;
-#ifdef CONFIG_NUMA
-	struct sched_domain *parent;
-
-	d->sd_allnodes = 0;
-	if (cpumask_weight(cpu_map) >
-	    SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
-		sd = &per_cpu(allnodes_domains, i).sd;
-		SD_INIT(sd, ALLNODES);
-		set_domain_attribute(sd, attr);
-		cpumask_copy(sched_domain_span(sd), cpu_map);
-		cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
-		d->sd_allnodes = 1;
-	}
-	parent = sd;
-
-	sd = &per_cpu(node_domains, i).sd;
-	SD_INIT(sd, NODE);
-	set_domain_attribute(sd, attr);
-	sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
-	sd->parent = parent;
-	if (parent)
-		parent->child = sd;
-	cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
-#endif
-	return sd;
-}
+	struct sd_data *sdd = sd->private;
+	struct sched_group *sg = sd->groups;
 
-static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
-	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-	struct sched_domain *parent, int i)
-{
-	struct sched_domain *sd;
-	sd = &per_cpu(phys_domains, i).sd;
-	SD_INIT(sd, CPU);
-	set_domain_attribute(sd, attr);
-	cpumask_copy(sched_domain_span(sd), d->nodemask);
-	sd->parent = parent;
-	if (parent)
-		parent->child = sd;
-	cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
-	return sd;
-}
+	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
+	*per_cpu_ptr(sdd->sd, cpu) = NULL;
 
-static struct sched_domain *__build_book_sched_domain(struct s_data *d,
-	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-	struct sched_domain *parent, int i)
-{
-	struct sched_domain *sd = parent;
-#ifdef CONFIG_SCHED_BOOK
-	sd = &per_cpu(book_domains, i).sd;
-	SD_INIT(sd, BOOK);
-	set_domain_attribute(sd, attr);
-	cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
-	sd->parent = parent;
-	parent->child = sd;
-	cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
-#endif
-	return sd;
+	if (cpu == cpumask_first(sched_group_cpus(sg))) {
+		WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
+		*per_cpu_ptr(sdd->sg, cpu) = NULL;
+	}
 }
 
-static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
-	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-	struct sched_domain *parent, int i)
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *cpu_smt_mask(int cpu)
 {
-	struct sched_domain *sd = parent;
-#ifdef CONFIG_SCHED_MC
-	sd = &per_cpu(core_domains, i).sd;
-	SD_INIT(sd, MC);
-	set_domain_attribute(sd, attr);
-	cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
-	sd->parent = parent;
-	parent->child = sd;
-	cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
-#endif
-	return sd;
+	return topology_thread_cpumask(cpu);
 }
-
-static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
-	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-	struct sched_domain *parent, int i)
-{
-	struct sched_domain *sd = parent;
-#ifdef CONFIG_SCHED_SMT
-	sd = &per_cpu(cpu_domains, i).sd;
-	SD_INIT(sd, SIBLING);
-	set_domain_attribute(sd, attr);
-	cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
-	sd->parent = parent;
-	parent->child = sd;
-	cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
-	return sd;
-}
 
-static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
-			       const struct cpumask *cpu_map, int cpu)
-{
-	switch (l) {
+/*
+ * Topology list, bottom-up.
+ */
+static struct sched_domain_topology_level default_topology[] = {
 #ifdef CONFIG_SCHED_SMT
-	case SD_LV_SIBLING: /* set up CPU (sibling) groups */
-		cpumask_and(d->this_sibling_map, cpu_map,
-			    topology_thread_cpumask(cpu));
-		if (cpu == cpumask_first(d->this_sibling_map))
-			init_sched_build_groups(d->this_sibling_map, cpu_map,
-						&cpu_to_cpu_group,
-						d->send_covered, d->tmpmask);
-		break;
+	{ sd_init_SIBLING, cpu_smt_mask, },
 #endif
 #ifdef CONFIG_SCHED_MC
-	case SD_LV_MC: /* set up multi-core groups */
-		cpumask_and(d->this_core_map, cpu_map, cpu_coregroup_mask(cpu));
-		if (cpu == cpumask_first(d->this_core_map))
-			init_sched_build_groups(d->this_core_map, cpu_map,
-						&cpu_to_core_group,
-						d->send_covered, d->tmpmask);
-		break;
+	{ sd_init_MC, cpu_coregroup_mask, },
 #endif
 #ifdef CONFIG_SCHED_BOOK
-	case SD_LV_BOOK: /* set up book groups */
-		cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
-		if (cpu == cpumask_first(d->this_book_map))
-			init_sched_build_groups(d->this_book_map, cpu_map,
-						&cpu_to_book_group,
-						d->send_covered, d->tmpmask);
-		break;
+	{ sd_init_BOOK, cpu_book_mask, },
 #endif
-	case SD_LV_CPU: /* set up physical groups */
-		cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
-		if (!cpumask_empty(d->nodemask))
-			init_sched_build_groups(d->nodemask, cpu_map,
-						&cpu_to_phys_group,
-						d->send_covered, d->tmpmask);
-		break;
+	{ sd_init_CPU, cpu_cpu_mask, },
 #ifdef CONFIG_NUMA
-	case SD_LV_ALLNODES:
-		init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,
-					d->send_covered, d->tmpmask);
-		break;
+	{ sd_init_NODE, cpu_node_mask, },
+	{ sd_init_ALLNODES, cpu_allnodes_mask, },
 #endif
-	default:
-		break;
+	{ NULL, },
+};
+
+static struct sched_domain_topology_level *sched_domain_topology = default_topology;
+
+static int __sdt_alloc(const struct cpumask *cpu_map)
+{
+	struct sched_domain_topology_level *tl;
+	int j;
+
+	for (tl = sched_domain_topology; tl->init; tl++) {
+		struct sd_data *sdd = &tl->data;
+
+		sdd->sd = alloc_percpu(struct sched_domain *);
+		if (!sdd->sd)
+			return -ENOMEM;
+
+		sdd->sg = alloc_percpu(struct sched_group *);
+		if (!sdd->sg)
+			return -ENOMEM;
+
+		for_each_cpu(j, cpu_map) {
+			struct sched_domain *sd;
+			struct sched_group *sg;
+
+		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sd)
+				return -ENOMEM;
+
+			*per_cpu_ptr(sdd->sd, j) = sd;
+
+			sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sg)
+				return -ENOMEM;
+
+			*per_cpu_ptr(sdd->sg, j) = sg;
+		}
+	}
+
+	return 0;
+}
+
+static void __sdt_free(const struct cpumask *cpu_map)
+{
+	struct sched_domain_topology_level *tl;
+	int j;
+
+	for (tl = sched_domain_topology; tl->init; tl++) {
+		struct sd_data *sdd = &tl->data;
+
+		for_each_cpu(j, cpu_map) {
+			kfree(*per_cpu_ptr(sdd->sd, j));
+			kfree(*per_cpu_ptr(sdd->sg, j));
+		}
+		free_percpu(sdd->sd);
+		free_percpu(sdd->sg);
 	}
 }
 
+struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
+		struct s_data *d, const struct cpumask *cpu_map,
+		struct sched_domain_attr *attr, struct sched_domain *child,
+		int cpu)
+{
+	struct sched_domain *sd = tl->init(tl, cpu);
+	if (!sd)
+		return child;
+
+	set_domain_attribute(sd, attr);
+	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+	if (child) {
+		sd->level = child->level + 1;
+		sched_domain_level_max = max(sched_domain_level_max, sd->level);
+		child->parent = sd;
+	}
+	sd->child = child;
+
+	return sd;
+}
+
 /*
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
  */
-static int __build_sched_domains(const struct cpumask *cpu_map,
-				 struct sched_domain_attr *attr)
+static int build_sched_domains(const struct cpumask *cpu_map,
+			       struct sched_domain_attr *attr)
 {
 	enum s_alloc alloc_state = sa_none;
-	struct s_data d;
 	struct sched_domain *sd;
-	int i;
-#ifdef CONFIG_NUMA
-	d.sd_allnodes = 0;
-#endif
+	struct s_data d;
+	int i, ret = -ENOMEM;
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
 	if (alloc_state != sa_rootdomain)
 		goto error;
-	alloc_state = sa_sched_groups;
 
-	/*
-	 * Set up domains for cpus specified by the cpu_map.
-	 */
+	/* Set up domains for cpus specified by the cpu_map. */
 	for_each_cpu(i, cpu_map) {
-		cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
-			    cpu_map);
+		struct sched_domain_topology_level *tl;
 
-		sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
-		sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
-		sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
-		sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
-		sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
-	}
-
-	for_each_cpu(i, cpu_map) {
-		build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
-		build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
-		build_sched_groups(&d, SD_LV_MC, cpu_map, i);
-	}
-
-	/* Set up physical groups */
-	for (i = 0; i < nr_node_ids; i++)
-		build_sched_groups(&d, SD_LV_CPU, cpu_map, i);
+		sd = NULL;
+		for (tl = sched_domain_topology; tl->init; tl++)
+			sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
 
-#ifdef CONFIG_NUMA
-	/* Set up node groups */
-	if (d.sd_allnodes)
-		build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);
-
-	for (i = 0; i < nr_node_ids; i++)
-		if (build_numa_sched_groups(&d, cpu_map, i))
-			goto error;
-#endif
+		while (sd->child)
+			sd = sd->child;
 
-	/* Calculate CPU power for physical packages and nodes */
-#ifdef CONFIG_SCHED_SMT
-	for_each_cpu(i, cpu_map) {
-		sd = &per_cpu(cpu_domains, i).sd;
-		init_sched_groups_power(i, sd);
-	}
-#endif
-#ifdef CONFIG_SCHED_MC
-	for_each_cpu(i, cpu_map) {
-		sd = &per_cpu(core_domains, i).sd;
-		init_sched_groups_power(i, sd);
+		*per_cpu_ptr(d.sd, i) = sd;
 	}
-#endif
-#ifdef CONFIG_SCHED_BOOK
-	for_each_cpu(i, cpu_map) {
-		sd = &per_cpu(book_domains, i).sd;
-		init_sched_groups_power(i, sd);
-	}
-#endif
 
+	/* Build the groups for the domains */
 	for_each_cpu(i, cpu_map) {
-		sd = &per_cpu(phys_domains, i).sd;
-		init_sched_groups_power(i, sd);
-	}
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			sd->span_weight = cpumask_weight(sched_domain_span(sd));
+			get_group(i, sd->private, &sd->groups);
+			atomic_inc(&sd->groups->ref);
 
-#ifdef CONFIG_NUMA
-	for (i = 0; i < nr_node_ids; i++)
-		init_numa_sched_groups_power(d.sched_group_nodes[i]);
+			if (i != cpumask_first(sched_domain_span(sd)))
+				continue;
 
-	if (d.sd_allnodes) {
-		struct sched_group *sg;
+			build_sched_groups(sd);
+		}
+	}
 
-		cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
-								d.tmpmask);
-		init_numa_sched_groups_power(sg);
+	/* Calculate CPU power for physical packages and nodes */
+	for (i = nr_cpumask_bits-1; i >= 0; i--) {
+		if (!cpumask_test_cpu(i, cpu_map))
+			continue;
+
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+			claim_allocations(i, sd);
+			init_sched_groups_power(i, sd);
+		}
 	}
-#endif
 
 	/* Attach the domains */
+	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
-#ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i).sd;
-#elif defined(CONFIG_SCHED_MC)
-		sd = &per_cpu(core_domains, i).sd;
-#elif defined(CONFIG_SCHED_BOOK)
-		sd = &per_cpu(book_domains, i).sd;
-#else
-		sd = &per_cpu(phys_domains, i).sd;
-#endif
+		sd = *per_cpu_ptr(d.sd, i);
 		cpu_attach_domain(sd, d.rd, i);
 	}
+	rcu_read_unlock();
 
-	d.sched_group_nodes = NULL; /* don't free this we still need it */
-	__free_domain_allocs(&d, sa_tmpmask, cpu_map);
-	return 0;
-
+	ret = 0;
 error:
 	__free_domain_allocs(&d, alloc_state, cpu_map);
-	return -ENOMEM;
-}
-
-static int build_sched_domains(const struct cpumask *cpu_map)
-{
-	return __build_sched_domains(cpu_map, NULL);
+	return ret;
 }
 
 static cpumask_var_t *doms_cur;	/* current sched domains */
@@ -7670,7 +7362,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
  * For now this just excludes isolated cpus, but could be used to
  * exclude other special cases in the future.
  */
-static int arch_init_sched_domains(const struct cpumask *cpu_map)
+static int init_sched_domains(const struct cpumask *cpu_map)
 {
 	int err;
 
@@ -7681,32 +7373,24 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map)
 		doms_cur = &fallback_doms;
 	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
-	err = build_sched_domains(doms_cur[0]);
+	err = build_sched_domains(doms_cur[0], NULL);
 	register_sched_domain_sysctl();
 
 	return err;
 }
 
-static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
-				       struct cpumask *tmpmask)
-{
-	free_sched_groups(cpu_map, tmpmask);
-}
-
 /*
  * Detach sched domains from a group of cpus specified in cpu_map
  * These cpus will now be attached to the NULL domain
  */
 static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
-	/* Save because hotplug lock held. */
-	static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
 	int i;
 
+	rcu_read_lock();
 	for_each_cpu(i, cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
-	synchronize_sched();
-	arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
+	rcu_read_unlock();
 }
 
 /* handle null as "default" */
@@ -7795,8 +7479,7 @@ match1:
 				goto match2;
 		}
 		/* no match - add a new doms_new */
-		__build_sched_domains(doms_new[i],
-					dattr_new ? dattr_new + i : NULL);
+		build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
 match2:
 		;
 	}
@@ -7815,7 +7498,7 @@ match2:
 }
 
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-static void arch_reinit_sched_domains(void)
+static void reinit_sched_domains(void)
 {
 	get_online_cpus();
 
@@ -7848,7 +7531,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 	else
 		sched_mc_power_savings = level;
 
-	arch_reinit_sched_domains();
+	reinit_sched_domains();
 
 	return count;
 }
@@ -7967,14 +7650,9 @@ void __init sched_init_smp(void)
 	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
 	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
-#if defined(CONFIG_NUMA)
-	sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
-								GFP_KERNEL);
-	BUG_ON(sched_group_nodes_bycpu == NULL);
-#endif
 	get_online_cpus();
 	mutex_lock(&sched_domains_mutex);
-	arch_init_sched_domains(cpu_active_mask);
+	init_sched_domains(cpu_active_mask);
 	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
 	if (cpumask_empty(non_isolated_cpus))
 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -8281,6 +7959,7 @@ void __init sched_init(void)
 	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
 	zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
 #ifdef CONFIG_SMP
+	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
 #ifdef CONFIG_NO_HZ
 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
 	alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
@@ -8340,7 +8019,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
 	int old_prio = p->prio;
 	int on_rq;
 
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		deactivate_task(rq, p, 0);
 	__setscheduler(rq, p, SCHED_NORMAL, 0);
@@ -8553,7 +8232,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
 	struct rt_rq *rt_rq;
 	struct sched_rt_entity *rt_se;
-	struct rq *rq;
 	int i;
 
 	tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8567,8 +8245,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 			ktime_to_ns(def_rt_bandwidth.rt_period), 0);
 
 	for_each_possible_cpu(i) {
-		rq = cpu_rq(i);
-
 		rt_rq = kzalloc_node(sizeof(struct rt_rq),
 				     GFP_KERNEL, cpu_to_node(i));
 		if (!rt_rq)
@@ -8683,7 +8359,7 @@ void sched_move_task(struct task_struct *tsk)
 	rq = task_rq_lock(tsk, &flags);
 
 	running = task_current(rq, tsk);
-	on_rq = tsk->se.on_rq;
+	on_rq = tsk->on_rq;
 
 	if (on_rq)
 		dequeue_task(rq, tsk, 0);
@@ -8702,7 +8378,7 @@ void sched_move_task(struct task_struct *tsk)
 	if (on_rq)
 		enqueue_task(rq, tsk, 0);
 
-	task_rq_unlock(rq, &flags);
+	task_rq_unlock(rq, tsk, &flags);
 }
 #endif /* CONFIG_CGROUP_SCHED */
 
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 7bacd83a4158..a6710a112b4f 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_lock_irqsave(&tasklist_lock, flags);
 
 	do_each_thread(g, p) {
-		if (!p->se.on_rq || task_cpu(p) != rq_cpu)
+		if (!p->on_rq || task_cpu(p) != rq_cpu)
 			continue;
 
 		print_task(m, rq, p);
@@ -296,9 +296,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(ttwu_count);
 	P(ttwu_local);
 
-	SEQ_printf(m, "  .%-30s: %d\n", "bkl_count",
-				rq->rq_sched_info.bkl_count);
-
 #undef P
 #undef P64
 #endif
@@ -441,7 +438,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.statistics.wait_count);
 	PN(se.statistics.iowait_sum);
 	P(se.statistics.iowait_count);
-	P(sched_info.bkl_count);
 	P(se.nr_migrations);
 	P(se.statistics.nr_migrations_cold);
 	P(se.statistics.nr_failed_migrations_affine);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 7f00772e57c9..37f22626225e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
 	}
 
 	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
+#ifndef CONFIG_64BIT
+	smp_wmb();
+	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
+#endif
 }
 
 /*
@@ -1340,6 +1344,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	hrtick_update(rq);
 }
 
+static void set_next_buddy(struct sched_entity *se);
+
 /*
  * The dequeue_task method is called before nr_running is
  * decreased. We remove the task from the rbtree and
@@ -1349,14 +1355,22 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int task_sleep = flags & DEQUEUE_SLEEP;
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, flags);
 
 		/* Don't dequeue parent if it has other entities besides us */
-		if (cfs_rq->load.weight)
+		if (cfs_rq->load.weight) {
+			/*
+			 * Bias pick_next to pick a task from this cfs_rq, as
+			 * p is sleeping when it is within its sched_slice.
+			 */
+			if (task_sleep && parent_entity(se))
+				set_next_buddy(parent_entity(se));
 			break;
+		}
 		flags |= DEQUEUE_SLEEP;
 	}
 
@@ -1372,12 +1386,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 #ifdef CONFIG_SMP
 
-static void task_waking_fair(struct rq *rq, struct task_struct *p)
+static void task_waking_fair(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	u64 min_vruntime;
 
-	se->vruntime -= cfs_rq->min_vruntime;
+#ifndef CONFIG_64BIT
+	u64 min_vruntime_copy;
+
+	do {
+		min_vruntime_copy = cfs_rq->min_vruntime_copy;
+		smp_rmb();
+		min_vruntime = cfs_rq->min_vruntime;
+	} while (min_vruntime != min_vruntime_copy);
+#else
+	min_vruntime = cfs_rq->min_vruntime;
+#endif
+
+	se->vruntime -= min_vruntime;
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1622,6 +1649,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 	/*
 	 * Otherwise, iterate the domains and find an elegible idle cpu.
 	 */
+	rcu_read_lock();
 	for_each_domain(target, sd) {
 		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
 			break;
@@ -1641,6 +1669,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
 			break;
 	}
+	rcu_read_unlock();
 
 	return target;
 }
@@ -1657,7 +1686,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
  * preempt must be disabled.
  */
 static int
-select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
+select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
@@ -1673,6 +1702,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 		new_cpu = prev_cpu;
 	}
 
+	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		if (!(tmp->flags & SD_LOAD_BALANCE))
 			continue;
@@ -1723,9 +1753,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
 	if (affine_sd) {
 		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
-			return select_idle_sibling(p, cpu);
-		else
-			return select_idle_sibling(p, prev_cpu);
+			prev_cpu = cpu;
+
+		new_cpu = select_idle_sibling(p, prev_cpu);
+		goto unlock;
 	}
 
 	while (sd) {
@@ -1766,6 +1797,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 		}
 		/* while loop will break here if sd == NULL */
 	}
+unlock:
+	rcu_read_unlock();
 
 	return new_cpu;
 }
@@ -1789,10 +1822,7 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
 	 * This is especially important for buddies when the leftmost
 	 * task is higher priority than the buddy.
 	 */
-	if (unlikely(se->load.weight != NICE_0_LOAD))
-		gran = calc_delta_fair(gran, se);
-
-	return gran;
+	return calc_delta_fair(gran, se);
 }
 
 /*
@@ -1826,26 +1856,26 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
 
 static void set_last_buddy(struct sched_entity *se)
 {
-	if (likely(task_of(se)->policy != SCHED_IDLE)) {
-		for_each_sched_entity(se)
-			cfs_rq_of(se)->last = se;
-	}
+	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
+		return;
+
+	for_each_sched_entity(se)
+		cfs_rq_of(se)->last = se;
 }
 
 static void set_next_buddy(struct sched_entity *se)
 {
-	if (likely(task_of(se)->policy != SCHED_IDLE)) {
-		for_each_sched_entity(se)
-			cfs_rq_of(se)->next = se;
-	}
+	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
+		return;
+
+	for_each_sched_entity(se)
+		cfs_rq_of(se)->next = se;
 }
 
 static void set_skip_buddy(struct sched_entity *se)
 {
-	if (likely(task_of(se)->policy != SCHED_IDLE)) {
-		for_each_sched_entity(se)
-			cfs_rq_of(se)->skip = se;
-	}
+	for_each_sched_entity(se)
+		cfs_rq_of(se)->skip = se;
 }
 
 /*
@@ -1857,12 +1887,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	struct sched_entity *se = &curr->se, *pse = &p->se;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	int scale = cfs_rq->nr_running >= sched_nr_latency;
+	int next_buddy_marked = 0;
 
 	if (unlikely(se == pse))
 		return;
 
-	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK))
+	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
 		set_next_buddy(pse);
+		next_buddy_marked = 1;
+	}
 
 	/*
 	 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1890,8 +1923,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	update_curr(cfs_rq);
 	find_matching_se(&se, &pse);
 	BUG_ON(!pse);
-	if (wakeup_preempt_entity(se, pse) == 1)
+	if (wakeup_preempt_entity(se, pse) == 1) {
+		/*
+		 * Bias pick_next to pick the sched entity that is
+		 * triggering this preemption.
+		 */
+		if (!next_buddy_marked)
+			set_next_buddy(pse);
 		goto preempt;
+	}
 
 	return;
 
@@ -2102,23 +2142,22 @@ static unsigned long
 balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	      unsigned long max_load_move, struct sched_domain *sd,
 	      enum cpu_idle_type idle, int *all_pinned,
-	      int *this_best_prio, struct cfs_rq *busiest_cfs_rq)
+	      struct cfs_rq *busiest_cfs_rq)
 {
-	int loops = 0, pulled = 0, pinned = 0;
+	int loops = 0, pulled = 0;
 	long rem_load_move = max_load_move;
 	struct task_struct *p, *n;
 
 	if (max_load_move == 0)
 		goto out;
 
-	pinned = 1;
-
 	list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
 		if (loops++ > sysctl_sched_nr_migrate)
 			break;
 
 		if ((p->se.load.weight >> 1) > rem_load_move ||
-		    !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned))
+		    !can_migrate_task(p, busiest, this_cpu, sd, idle,
+				      all_pinned))
 			continue;
 
 		pull_task(busiest, p, this_rq, this_cpu);
@@ -2141,9 +2180,6 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		 */
 		if (rem_load_move <= 0)
 			break;
-
-		if (p->prio < *this_best_prio)
-			*this_best_prio = p->prio;
 	}
 out:
 	/*
@@ -2153,9 +2189,6 @@ out:
 	 */
 	schedstat_add(sd, lb_gained[idle], pulled);
 
-	if (all_pinned)
-		*all_pinned = pinned;
-
 	return max_load_move - rem_load_move;
 }
 
@@ -2206,7 +2239,7 @@ static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
 		  struct sched_domain *sd, enum cpu_idle_type idle,
-		  int *all_pinned, int *this_best_prio)
+		  int *all_pinned)
 {
 	long rem_load_move = max_load_move;
 	int busiest_cpu = cpu_of(busiest);
@@ -2231,7 +2264,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		rem_load = div_u64(rem_load, busiest_h_load + 1);
 
 		moved_load = balance_tasks(this_rq, this_cpu, busiest,
-				rem_load, sd, idle, all_pinned, this_best_prio,
+				rem_load, sd, idle, all_pinned,
 				busiest_cfs_rq);
 
 		if (!moved_load)
@@ -2257,11 +2290,11 @@ static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
 		  struct sched_domain *sd, enum cpu_idle_type idle,
-		  int *all_pinned, int *this_best_prio)
+		  int *all_pinned)
 {
 	return balance_tasks(this_rq, this_cpu, busiest,
 			max_load_move, sd, idle, all_pinned,
-			this_best_prio, &busiest->cfs);
+			&busiest->cfs);
 }
 #endif
 
@@ -2278,12 +2311,11 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		      int *all_pinned)
 {
 	unsigned long total_load_moved = 0, load_moved;
-	int this_best_prio = this_rq->curr->prio;
 
 	do {
 		load_moved = load_balance_fair(this_rq, this_cpu, busiest,
 				max_load_move - total_load_moved,
-				sd, idle, all_pinned, &this_best_prio);
+				sd, idle, all_pinned);
 
 		total_load_moved += load_moved;
 
@@ -2652,7 +2684,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 	/*
 	 * Only siblings can have significantly less than SCHED_LOAD_SCALE
 	 */
-	if (sd->level != SD_LV_SIBLING)
+	if (!(sd->flags & SD_SHARE_CPUPOWER))
 		return 0;
 
 	/*
@@ -3127,6 +3159,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 	if (!sds.busiest || sds.busiest_nr_running == 0)
 		goto out_balanced;
 
+	sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
+
 	/*
 	 * If the busiest group is imbalanced the below checks don't
 	 * work because they assumes all things are equal, which typically
@@ -3151,7 +3185,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 	 * Don't pull any tasks if this group is already above the domain
 	 * average load.
 	 */
-	sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
 	if (sds.this_load >= sds.avg_load)
 		goto out_balanced;
 
@@ -3340,6 +3373,7 @@ redo:
 		 * still unbalanced. ld_moved simply stays zero, so it is
 		 * correctly treated as an imbalance.
 		 */
+		all_pinned = 1;
 		local_irq_save(flags);
 		double_rq_lock(this_rq, busiest);
 		ld_moved = move_tasks(this_rq, this_cpu, busiest,
@@ -3467,6 +3501,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 	raw_spin_unlock(&this_rq->lock);
 
 	update_shares(this_cpu);
+	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
 		int balance = 1;
@@ -3488,6 +3523,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 			break;
 		}
 	}
+	rcu_read_unlock();
 
 	raw_spin_lock(&this_rq->lock);
 
@@ -3536,6 +3572,7 @@ static int active_load_balance_cpu_stop(void *data)
 	double_lock_balance(busiest_rq, target_rq);
 
 	/* Search for an sd spanning us and the target CPU. */
+	rcu_read_lock();
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
 		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
@@ -3551,6 +3588,7 @@ static int active_load_balance_cpu_stop(void *data)
 		else
 			schedstat_inc(sd, alb_failed);
 	}
+	rcu_read_unlock();
 	double_unlock_balance(busiest_rq, target_rq);
 out_unlock:
 	busiest_rq->active_balance = 0;
@@ -3677,6 +3715,7 @@ static int find_new_ilb(int cpu)
 {
 	struct sched_domain *sd;
 	struct sched_group *ilb_group;
+	int ilb = nr_cpu_ids;
 
 	/*
 	 * Have idle load balancer selection from semi-idle packages only
@@ -3692,20 +3731,25 @@ static int find_new_ilb(int cpu)
 	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
 		goto out_done;
 
+	rcu_read_lock();
 	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
 		ilb_group = sd->groups;
 
 		do {
-			if (is_semi_idle_group(ilb_group))
-				return cpumask_first(nohz.grp_idle_mask);
+			if (is_semi_idle_group(ilb_group)) {
+				ilb = cpumask_first(nohz.grp_idle_mask);
+				goto unlock;
+			}
 
 			ilb_group = ilb_group->next;
 
 		} while (ilb_group != sd->groups);
 	}
+unlock:
+	rcu_read_unlock();
 
 out_done:
-	return nr_cpu_ids;
+	return ilb;
 }
 #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
 static inline int find_new_ilb(int call_cpu)
@@ -3850,6 +3894,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 
 	update_shares(cpu);
 
+	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
@@ -3895,6 +3940,7 @@ out:
 		if (!balance)
 			break;
 	}
+	rcu_read_unlock();
 
 	/*
 	 * next_balance will be updated only when there is a need.
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 68e69acc29b9..be40f7371ee1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1)
  * Decrement CPU power based on irq activity
  */
 SCHED_FEAT(NONIRQ_POWER, 1)
+
+/*
+ * Queue remote wakeups on the target CPU and process them
+ * using the scheduler IPI. Reduces rq->lock contention/bounces.
+ */
+SCHED_FEAT(TTWU_QUEUE, 1)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index a776a6396427..0a51882534ea 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -7,7 +7,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e7cebdc65f82..64b2a37c07d0 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 }
 
+typedef struct task_group *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
+	     (&iter->list != &task_groups) && \
+	     (rt_rq = iter->rt_rq[cpu_of(rq)]); \
+	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 	list_add_rcu(&rt_rq->leaf_rt_rq_list,
@@ -288,6 +296,11 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(def_rt_bandwidth.rt_period);
 }
 
+typedef struct rt_rq *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+	for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 }
@@ -402,12 +415,13 @@ next:
 static void __disable_runtime(struct rq *rq)
 {
 	struct root_domain *rd = rq->rd;
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	if (unlikely(!scheduler_running))
 		return;
 
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	for_each_rt_rq(rt_rq, iter, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 		s64 want;
 		int i;
@@ -487,6 +501,7 @@ static void disable_runtime(struct rq *rq)
 
 static void __enable_runtime(struct rq *rq)
 {
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	if (unlikely(!scheduler_running))
@@ -495,7 +510,7 @@ static void __enable_runtime(struct rq *rq)
 	/*
 	 * Reset each runqueue's bandwidth settings
 	 */
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	for_each_rt_rq(rt_rq, iter, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 
 		raw_spin_lock(&rt_b->rt_runtime_lock);
@@ -562,6 +577,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
 				rt_rq->rt_throttled = 0;
 				enqueue = 1;
+
+				/*
+				 * Force a clock update if the CPU was idle,
+				 * lest wakeup -> unthrottle time accumulate.
+				 */
+				if (rt_rq->rt_nr_running && rq->curr == rq->idle)
+					rq->skip_clock_update = -1;
 			}
 			if (rt_rq->rt_time || rt_rq->rt_nr_running)
 				idle = 0;
@@ -977,13 +999,23 @@ static void yield_task_rt(struct rq *rq)
 static int find_lowest_rq(struct task_struct *task);
 
 static int
-select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 {
+	struct task_struct *curr;
+	struct rq *rq;
+	int cpu;
+
 	if (sd_flag != SD_BALANCE_WAKE)
 		return smp_processor_id();
 
+	cpu = task_cpu(p);
+	rq = cpu_rq(cpu);
+
+	rcu_read_lock();
+	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+
 	/*
-	 * If the current task is an RT task, then
+	 * If the current task on @p's runqueue is an RT task, then
 	 * try to see if we can wake this RT task up on another
 	 * runqueue. Otherwise simply start this RT task
 	 * on its current runqueue.
@@ -997,21 +1029,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
 	 * lock?
 	 *
 	 * For equal prio tasks, we just let the scheduler sort it out.
+	 *
+	 * Otherwise, just let it ride on the affined RQ and the
+	 * post-schedule router will push the preempted task away
+	 *
+	 * This test is optimistic, if we get it wrong the load-balancer
+	 * will have to sort it out.
 	 */
-	if (unlikely(rt_task(rq->curr)) &&
-	    (rq->curr->rt.nr_cpus_allowed < 2 ||
-	     rq->curr->prio < p->prio) &&
+	if (curr && unlikely(rt_task(curr)) &&
+	    (curr->rt.nr_cpus_allowed < 2 ||
+	     curr->prio < p->prio) &&
 	    (p->rt.nr_cpus_allowed > 1)) {
-		int cpu = find_lowest_rq(p);
+		int target = find_lowest_rq(p);
 
-		return (cpu == -1) ? task_cpu(p) : cpu;
+		if (target != -1)
+			cpu = target;
 	}
+	rcu_read_unlock();
 
-	/*
-	 * Otherwise, just let it ride on the affined RQ and the
-	 * post-schedule router will push the preempted task away
-	 */
-	return task_cpu(p);
+	return cpu;
 }
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
@@ -1136,7 +1172,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
+	if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1287,7 +1323,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 				     !cpumask_test_cpu(lowest_rq->cpu,
 						       &task->cpus_allowed) ||
 				     task_running(rq, task) ||
-				     !task->se.on_rq)) {
+				     !task->on_rq)) {
 
 				raw_spin_unlock(&lowest_rq->lock);
 				lowest_rq = NULL;
@@ -1321,7 +1357,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 	BUG_ON(task_current(rq, p));
 	BUG_ON(p->rt.nr_cpus_allowed <= 1);
 
-	BUG_ON(!p->se.on_rq);
+	BUG_ON(!p->on_rq);
 	BUG_ON(!rt_task(p));
 
 	return p;
@@ -1467,7 +1503,7 @@ static int pull_rt_task(struct rq *this_rq)
 		 */
 		if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
 			WARN_ON(p == src_rq->curr);
-			WARN_ON(!p->se.on_rq);
+			WARN_ON(!p->on_rq);
 
 			/*
 			 * There's a chance that p is higher in priority
@@ -1538,7 +1574,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 	 * Update the migration status of the RQ if we have an RT task
 	 * which is running AND changing its weight value.
 	 */
-	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
+	if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
 		struct rq *rq = task_rq(p);
 
 		if (!task_current(rq, p)) {
@@ -1608,7 +1644,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 	 * we may need to handle the pulling of RT tasks
 	 * now.
 	 */
-	if (p->se.on_rq && !rq->rt.rt_nr_running)
+	if (p->on_rq && !rq->rt.rt_nr_running)
 		pull_rt_task(rq);
 }
 
@@ -1638,7 +1674,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 	 * If that current running task is also an RT task
 	 * then see if we can move to another run queue.
 	 */
-	if (p->se.on_rq && rq->curr != p) {
+	if (p->on_rq && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (rq->rt.overloaded && push_rt_task(rq) &&
 		    /* Don't resched if we changed runqueues */
@@ -1657,7 +1693,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 static void
 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 {
-	if (!p->se.on_rq)
+	if (!p->on_rq)
 		return;
 
 	if (rq->curr == p) {
@@ -1796,10 +1832,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
 static void print_rt_stats(struct seq_file *m, int cpu)
 {
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	rcu_read_lock();
-	for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
+	for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
 		print_rt_rq(m, cpu, rt_rq);
 	rcu_read_unlock();
 }
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 1ba2bd40fdac..6f437632afab 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -9,8 +9,7 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_stop(struct rq *rq, struct task_struct *p,
-		    int sd_flag, int flags)
+select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* stop tasks as never migrate */
 }
@@ -26,7 +25,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 {
 	struct task_struct *stop = rq->stop;
 
-	if (stop && stop->se.on_rq)
+	if (stop && stop->on_rq)
 		return stop;
 
 	return NULL;
diff --git a/kernel/signal.c b/kernel/signal.c
index 7165af5f1b11..ad5e818baacc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 static int recalc_sigpending_tsk(struct task_struct *t)
 {
-	if (t->signal->group_stop_count > 0 ||
+	if ((t->group_stop & GROUP_STOP_PENDING) ||
 	    PENDING(&t->pending, &t->blocked) ||
 	    PENDING(&t->signal->shared_pending, &t->blocked)) {
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -223,6 +223,83 @@ static inline void print_dropped_signal(int sig)
 				current->comm, current->pid, sig);
 }
 
+/**
+ * task_clear_group_stop_trapping - clear group stop trapping bit
+ * @task: target task
+ *
+ * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us.  Clear it
+ * and wake up the ptracer.  Note that we don't need any further locking.
+ * @task->siglock guarantees that @task->parent points to the ptracer.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+static void task_clear_group_stop_trapping(struct task_struct *task)
+{
+	if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) {
+		task->group_stop &= ~GROUP_STOP_TRAPPING;
+		__wake_up_sync_key(&task->parent->signal->wait_chldexit,
+				   TASK_UNINTERRUPTIBLE, 1, task);
+	}
+}
+
+/**
+ * task_clear_group_stop_pending - clear pending group stop
+ * @task: target task
+ *
+ * Clear group stop states for @task.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ */
+void task_clear_group_stop_pending(struct task_struct *task)
+{
+	task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME |
+			      GROUP_STOP_DEQUEUED);
+}
+
+/**
+ * task_participate_group_stop - participate in a group stop
+ * @task: task participating in a group stop
+ *
+ * @task has GROUP_STOP_PENDING set and is participating in a group stop.
+ * Group stop states are cleared and the group stop count is consumed if
+ * %GROUP_STOP_CONSUME was set.  If the consumption completes the group
+ * stop, the appropriate %SIGNAL_* flags are set.
+ *
+ * CONTEXT:
+ * Must be called with @task->sighand->siglock held.
+ *
+ * RETURNS:
+ * %true if group stop completion should be notified to the parent, %false
+ * otherwise.
+ */
+static bool task_participate_group_stop(struct task_struct *task)
+{
+	struct signal_struct *sig = task->signal;
+	bool consume = task->group_stop & GROUP_STOP_CONSUME;
+
+	WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING));
+
+	task_clear_group_stop_pending(task);
+
+	if (!consume)
+		return false;
+
+	if (!WARN_ON_ONCE(sig->group_stop_count == 0))
+		sig->group_stop_count--;
+
+	/*
+	 * Tell the caller to notify completion iff we are entering into a
+	 * fresh group stop.  Read comment in do_signal_stop() for details.
+	 */
+	if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
+		sig->flags = SIGNAL_STOP_STOPPED;
+		return true;
+	}
+	return false;
+}
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
@@ -527,7 +604,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		 * is to alert stop-signal processing code when another
 		 * processor has come along and cleared the flag.
 		 */
-		tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+		current->group_stop |= GROUP_STOP_DEQUEUED;
 	}
 	if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
 		/*
@@ -592,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
 	if (sigisemptyset(&m))
 		return 0;
 
-	signandsets(&s->signal, &s->signal, mask);
+	sigandnsets(&s->signal, &s->signal, mask);
 	list_for_each_entry_safe(q, n, &s->list, list) {
 		if (sigismember(mask, q->info.si_signo)) {
 			list_del_init(&q->list);
@@ -727,34 +804,14 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 	} else if (sig == SIGCONT) {
 		unsigned int why;
 		/*
-		 * Remove all stop signals from all queues,
-		 * and wake all threads.
+		 * Remove all stop signals from all queues, wake all threads.
 		 */
 		rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
 		t = p;
 		do {
-			unsigned int state;
+			task_clear_group_stop_pending(t);
 			rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
-			/*
-			 * If there is a handler for SIGCONT, we must make
-			 * sure that no thread returns to user mode before
-			 * we post the signal, in case it was the only
-			 * thread eligible to run the signal handler--then
-			 * it must not do anything between resuming and
-			 * running the handler.  With the TIF_SIGPENDING
-			 * flag set, the thread will pause and acquire the
-			 * siglock that we hold now and until we've queued
-			 * the pending signal.
-			 *
-			 * Wake up the stopped thread _after_ setting
-			 * TIF_SIGPENDING
-			 */
-			state = __TASK_STOPPED;
-			if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
-				set_tsk_thread_flag(t, TIF_SIGPENDING);
-				state |= TASK_INTERRUPTIBLE;
-			}
-			wake_up_state(t, state);
+			wake_up_state(t, __TASK_STOPPED);
 		} while_each_thread(p, t);
 
 		/*
@@ -780,13 +837,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 			signal->flags = why | SIGNAL_STOP_CONTINUED;
 			signal->group_stop_count = 0;
 			signal->group_exit_code = 0;
-		} else {
-			/*
-			 * We are not stopped, but there could be a stop
-			 * signal in the middle of being processed after
-			 * being removed from the queue.  Clear that too.
-			 */
-			signal->flags &= ~SIGNAL_STOP_DEQUEUED;
 		}
 	}
 
@@ -875,6 +925,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
 			signal->group_stop_count = 0;
 			t = p;
 			do {
+				task_clear_group_stop_pending(t);
 				sigaddset(&t->pending.signal, SIGKILL);
 				signal_wake_up(t, 1);
 			} while_each_thread(p, t);
@@ -1109,6 +1160,7 @@ int zap_other_threads(struct task_struct *p)
 	p->signal->group_stop_count = 0;
 
 	while_each_thread(p, t) {
+		task_clear_group_stop_pending(t);
 		count++;
 
 		/* Don't bother with already dead threads */
@@ -1536,16 +1588,30 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 	return ret;
 }
 
-static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
+/**
+ * do_notify_parent_cldstop - notify parent of stopped/continued state change
+ * @tsk: task reporting the state change
+ * @for_ptracer: the notification is for ptracer
+ * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report
+ *
+ * Notify @tsk's parent that the stopped/continued state has changed.  If
+ * @for_ptracer is %false, @tsk's group leader notifies to its real parent.
+ * If %true, @tsk reports to @tsk->parent which should be the ptracer.
+ *
+ * CONTEXT:
+ * Must be called with tasklist_lock at least read locked.
+ */
+static void do_notify_parent_cldstop(struct task_struct *tsk,
+				     bool for_ptracer, int why)
 {
 	struct siginfo info;
 	unsigned long flags;
 	struct task_struct *parent;
 	struct sighand_struct *sighand;
 
-	if (task_ptrace(tsk))
+	if (for_ptracer) {
 		parent = tsk->parent;
-	else {
+	} else {
 		tsk = tsk->group_leader;
 		parent = tsk->real_parent;
 	}
@@ -1621,6 +1687,15 @@ static int sigkill_pending(struct task_struct *tsk)
 }
 
 /*
+ * Test whether the target task of the usual cldstop notification - the
+ * real_parent of @child - is in the same group as the ptracer.
+ */
+static bool real_parent_is_ptracer(struct task_struct *child)
+{
+	return same_thread_group(child->parent, child->real_parent);
+}
+
+/*
  * This must be called with current->sighand->siglock held.
  *
  * This should be the path for all ptrace stops.
@@ -1631,10 +1706,12 @@ static int sigkill_pending(struct task_struct *tsk)
  * If we actually decide not to stop at all because the tracer
  * is gone, we keep current->exit_code unless clear_code.
  */
-static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
+static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	__releases(&current->sighand->siglock)
 	__acquires(&current->sighand->siglock)
 {
+	bool gstop_done = false;
+
 	if (arch_ptrace_stop_needed(exit_code, info)) {
 		/*
 		 * The arch code has something special to do before a
@@ -1655,21 +1732,49 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
 	}
 
 	/*
-	 * If there is a group stop in progress,
-	 * we must participate in the bookkeeping.
+	 * If @why is CLD_STOPPED, we're trapping to participate in a group
+	 * stop.  Do the bookkeeping.  Note that if SIGCONT was delievered
+	 * while siglock was released for the arch hook, PENDING could be
+	 * clear now.  We act as if SIGCONT is received after TASK_TRACED
+	 * is entered - ignore it.
 	 */
-	if (current->signal->group_stop_count > 0)
-		--current->signal->group_stop_count;
+	if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING))
+		gstop_done = task_participate_group_stop(current);
 
 	current->last_siginfo = info;
 	current->exit_code = exit_code;
 
-	/* Let the debugger run.  */
-	__set_current_state(TASK_TRACED);
+	/*
+	 * TRACED should be visible before TRAPPING is cleared; otherwise,
+	 * the tracer might fail do_wait().
+	 */
+	set_current_state(TASK_TRACED);
+
+	/*
+	 * We're committing to trapping.  Clearing GROUP_STOP_TRAPPING and
+	 * transition to TASK_TRACED should be atomic with respect to
+	 * siglock.  This hsould be done after the arch hook as siglock is
+	 * released and regrabbed across it.
+	 */
+	task_clear_group_stop_trapping(current);
+
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
 	if (may_ptrace_stop()) {
-		do_notify_parent_cldstop(current, CLD_TRAPPED);
+		/*
+		 * Notify parents of the stop.
+		 *
+		 * While ptraced, there are two parents - the ptracer and
+		 * the real_parent of the group_leader.  The ptracer should
+		 * know about every stop while the real parent is only
+		 * interested in the completion of group stop.  The states
+		 * for the two don't interact with each other.  Notify
+		 * separately unless they're gonna be duplicates.
+		 */
+		do_notify_parent_cldstop(current, true, why);
+		if (gstop_done && !real_parent_is_ptracer(current))
+			do_notify_parent_cldstop(current, false, why);
+
 		/*
 		 * Don't want to allow preemption here, because
 		 * sys_ptrace() needs this task to be inactive.
@@ -1684,7 +1789,16 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
 		/*
 		 * By the time we got the lock, our tracer went away.
 		 * Don't drop the lock yet, another tracer may come.
+		 *
+		 * If @gstop_done, the ptracer went away between group stop
+		 * completion and here.  During detach, it would have set
+		 * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED
+		 * in do_signal_stop() on return, so notifying the real
+		 * parent of the group stop completion is enough.
 		 */
+		if (gstop_done)
+			do_notify_parent_cldstop(current, false, why);
+
 		__set_current_state(TASK_RUNNING);
 		if (clear_code)
 			current->exit_code = 0;
@@ -1728,7 +1842,7 @@ void ptrace_notify(int exit_code)
 
 	/* Let the debugger run.  */
 	spin_lock_irq(&current->sighand->siglock);
-	ptrace_stop(exit_code, 1, &info);
+	ptrace_stop(exit_code, CLD_TRAPPED, 1, &info);
 	spin_unlock_irq(&current->sighand->siglock);
 }
 
@@ -1741,66 +1855,115 @@ void ptrace_notify(int exit_code)
 static int do_signal_stop(int signr)
 {
 	struct signal_struct *sig = current->signal;
-	int notify;
 
-	if (!sig->group_stop_count) {
+	if (!(current->group_stop & GROUP_STOP_PENDING)) {
+		unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
 		struct task_struct *t;
 
-		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
+		/* signr will be recorded in task->group_stop for retries */
+		WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
+
+		if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) ||
 		    unlikely(signal_group_exit(sig)))
 			return 0;
 		/*
-		 * There is no group stop already in progress.
-		 * We must initiate one now.
+		 * There is no group stop already in progress.  We must
+		 * initiate one now.
+		 *
+		 * While ptraced, a task may be resumed while group stop is
+		 * still in effect and then receive a stop signal and
+		 * initiate another group stop.  This deviates from the
+		 * usual behavior as two consecutive stop signals can't
+		 * cause two group stops when !ptraced.  That is why we
+		 * also check !task_is_stopped(t) below.
+		 *
+		 * The condition can be distinguished by testing whether
+		 * SIGNAL_STOP_STOPPED is already set.  Don't generate
+		 * group_exit_code in such case.
+		 *
+		 * This is not necessary for SIGNAL_STOP_CONTINUED because
+		 * an intervening stop signal is required to cause two
+		 * continued events regardless of ptrace.
 		 */
-		sig->group_exit_code = signr;
+		if (!(sig->flags & SIGNAL_STOP_STOPPED))
+			sig->group_exit_code = signr;
+		else
+			WARN_ON_ONCE(!task_ptrace(current));
 
+		current->group_stop &= ~GROUP_STOP_SIGMASK;
+		current->group_stop |= signr | gstop;
 		sig->group_stop_count = 1;
-		for (t = next_thread(current); t != current; t = next_thread(t))
+		for (t = next_thread(current); t != current;
+		     t = next_thread(t)) {
+			t->group_stop &= ~GROUP_STOP_SIGMASK;
 			/*
 			 * Setting state to TASK_STOPPED for a group
 			 * stop is always done with the siglock held,
 			 * so this check has no races.
 			 */
-			if (!(t->flags & PF_EXITING) &&
-			    !task_is_stopped_or_traced(t)) {
+			if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
+				t->group_stop |= signr | gstop;
 				sig->group_stop_count++;
 				signal_wake_up(t, 0);
 			}
+		}
 	}
-	/*
-	 * If there are no other threads in the group, or if there is
-	 * a group stop in progress and we are the last to stop, report
-	 * to the parent.  When ptraced, every thread reports itself.
-	 */
-	notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
-	notify = tracehook_notify_jctl(notify, CLD_STOPPED);
-	/*
-	 * tracehook_notify_jctl() can drop and reacquire siglock, so
-	 * we keep ->group_stop_count != 0 before the call. If SIGCONT
-	 * or SIGKILL comes in between ->group_stop_count == 0.
-	 */
-	if (sig->group_stop_count) {
-		if (!--sig->group_stop_count)
-			sig->flags = SIGNAL_STOP_STOPPED;
-		current->exit_code = sig->group_exit_code;
+retry:
+	if (likely(!task_ptrace(current))) {
+		int notify = 0;
+
+		/*
+		 * If there are no other threads in the group, or if there
+		 * is a group stop in progress and we are the last to stop,
+		 * report to the parent.
+		 */
+		if (task_participate_group_stop(current))
+			notify = CLD_STOPPED;
+
 		__set_current_state(TASK_STOPPED);
+		spin_unlock_irq(&current->sighand->siglock);
+
+		/*
+		 * Notify the parent of the group stop completion.  Because
+		 * we're not holding either the siglock or tasklist_lock
+		 * here, ptracer may attach inbetween; however, this is for
+		 * group stop and should always be delivered to the real
+		 * parent of the group leader.  The new ptracer will get
+		 * its notification when this task transitions into
+		 * TASK_TRACED.
+		 */
+		if (notify) {
+			read_lock(&tasklist_lock);
+			do_notify_parent_cldstop(current, false, notify);
+			read_unlock(&tasklist_lock);
+		}
+
+		/* Now we don't run again until woken by SIGCONT or SIGKILL */
+		schedule();
+
+		spin_lock_irq(&current->sighand->siglock);
+	} else {
+		ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK,
+			    CLD_STOPPED, 0, NULL);
+		current->exit_code = 0;
 	}
-	spin_unlock_irq(&current->sighand->siglock);
 
-	if (notify) {
-		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(current, notify);
-		read_unlock(&tasklist_lock);
+	/*
+	 * GROUP_STOP_PENDING could be set if another group stop has
+	 * started since being woken up or ptrace wants us to transit
+	 * between TASK_STOPPED and TRACED.  Retry group stop.
+	 */
+	if (current->group_stop & GROUP_STOP_PENDING) {
+		WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK));
+		goto retry;
 	}
 
-	/* Now we don't run again until woken by SIGCONT or SIGKILL */
-	do {
-		schedule();
-	} while (try_to_freeze());
+	/* PTRACE_ATTACH might have raced with task killing, clear trapping */
+	task_clear_group_stop_trapping(current);
+
+	spin_unlock_irq(&current->sighand->siglock);
 
 	tracehook_finish_jctl();
-	current->exit_code = 0;
 
 	return 1;
 }
@@ -1814,7 +1977,7 @@ static int ptrace_signal(int signr, siginfo_t *info,
 	ptrace_signal_deliver(regs, cookie);
 
 	/* Let the debugger run.  */
-	ptrace_stop(signr, 0, info);
+	ptrace_stop(signr, CLD_TRAPPED, 0, info);
 
 	/* We're back.  Did the debugger cancel the sig?  */
 	signr = current->exit_code;
@@ -1869,18 +2032,36 @@ relock:
 	 * the CLD_ si_code into SIGNAL_CLD_MASK bits.
 	 */
 	if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
-		int why = (signal->flags & SIGNAL_STOP_CONTINUED)
-				? CLD_CONTINUED : CLD_STOPPED;
+		struct task_struct *leader;
+		int why;
+
+		if (signal->flags & SIGNAL_CLD_CONTINUED)
+			why = CLD_CONTINUED;
+		else
+			why = CLD_STOPPED;
+
 		signal->flags &= ~SIGNAL_CLD_MASK;
 
-		why = tracehook_notify_jctl(why, CLD_CONTINUED);
 		spin_unlock_irq(&sighand->siglock);
 
-		if (why) {
-			read_lock(&tasklist_lock);
-			do_notify_parent_cldstop(current->group_leader, why);
-			read_unlock(&tasklist_lock);
-		}
+		/*
+		 * Notify the parent that we're continuing.  This event is
+		 * always per-process and doesn't make whole lot of sense
+		 * for ptracers, who shouldn't consume the state via
+		 * wait(2) either, but, for backward compatibility, notify
+		 * the ptracer of the group leader too unless it's gonna be
+		 * a duplicate.
+		 */
+		read_lock(&tasklist_lock);
+
+		do_notify_parent_cldstop(current, false, why);
+
+		leader = current->group_leader;
+		if (task_ptrace(leader) && !real_parent_is_ptracer(leader))
+			do_notify_parent_cldstop(leader, true, why);
+
+		read_unlock(&tasklist_lock);
+
 		goto relock;
 	}
 
@@ -1897,8 +2078,8 @@ relock:
 		if (unlikely(signr != 0))
 			ka = return_ka;
 		else {
-			if (unlikely(signal->group_stop_count > 0) &&
-			    do_signal_stop(0))
+			if (unlikely(current->group_stop &
+				     GROUP_STOP_PENDING) && do_signal_stop(0))
 				goto relock;
 
 			signr = dequeue_signal(current, &current->blocked,
@@ -2017,10 +2198,42 @@ relock:
 	return signr;
 }
 
+/*
+ * It could be that complete_signal() picked us to notify about the
+ * group-wide signal. Other threads should be notified now to take
+ * the shared signals in @which since we will not.
+ */
+static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
+{
+	sigset_t retarget;
+	struct task_struct *t;
+
+	sigandsets(&retarget, &tsk->signal->shared_pending.signal, which);
+	if (sigisemptyset(&retarget))
+		return;
+
+	t = tsk;
+	while_each_thread(tsk, t) {
+		if (t->flags & PF_EXITING)
+			continue;
+
+		if (!has_pending_signals(&retarget, &t->blocked))
+			continue;
+		/* Remove the signals this thread can handle. */
+		sigandsets(&retarget, &retarget, &t->blocked);
+
+		if (!signal_pending(t))
+			signal_wake_up(t, 0);
+
+		if (sigisemptyset(&retarget))
+			break;
+	}
+}
+
 void exit_signals(struct task_struct *tsk)
 {
 	int group_stop = 0;
-	struct task_struct *t;
+	sigset_t unblocked;
 
 	if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
 		tsk->flags |= PF_EXITING;
@@ -2036,26 +2249,23 @@ void exit_signals(struct task_struct *tsk)
 	if (!signal_pending(tsk))
 		goto out;
 
-	/*
-	 * It could be that __group_complete_signal() choose us to
-	 * notify about group-wide signal. Another thread should be
-	 * woken now to take the signal since we will not.
-	 */
-	for (t = tsk; (t = next_thread(t)) != tsk; )
-		if (!signal_pending(t) && !(t->flags & PF_EXITING))
-			recalc_sigpending_and_wake(t);
+	unblocked = tsk->blocked;
+	signotset(&unblocked);
+	retarget_shared_pending(tsk, &unblocked);
 
-	if (unlikely(tsk->signal->group_stop_count) &&
-			!--tsk->signal->group_stop_count) {
-		tsk->signal->flags = SIGNAL_STOP_STOPPED;
-		group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
-	}
+	if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) &&
+	    task_participate_group_stop(tsk))
+		group_stop = CLD_STOPPED;
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
 
+	/*
+	 * If group stop has completed, deliver the notification.  This
+	 * should always go to the real parent of the group leader.
+	 */
 	if (unlikely(group_stop)) {
 		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(tsk, group_stop);
+		do_notify_parent_cldstop(tsk, false, group_stop);
 		read_unlock(&tasklist_lock);
 	}
 }
@@ -2089,11 +2299,33 @@ long do_no_restart_syscall(struct restart_block *param)
 	return -EINTR;
 }
 
-/*
- * We don't need to get the kernel lock - this is all local to this
- * particular thread.. (and that's good, because this is _heavily_
- * used by various programs)
+static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
+{
+	if (signal_pending(tsk) && !thread_group_empty(tsk)) {
+		sigset_t newblocked;
+		/* A set of now blocked but previously unblocked signals. */
+		sigandnsets(&newblocked, newset, &current->blocked);
+		retarget_shared_pending(tsk, &newblocked);
+	}
+	tsk->blocked = *newset;
+	recalc_sigpending();
+}
+
+/**
+ * set_current_blocked - change current->blocked mask
+ * @newset: new mask
+ *
+ * It is wrong to change ->blocked directly, this helper should be used
+ * to ensure the process can't miss a shared signal we are going to block.
  */
+void set_current_blocked(const sigset_t *newset)
+{
+	struct task_struct *tsk = current;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	__set_task_blocked(tsk, newset);
+	spin_unlock_irq(&tsk->sighand->siglock);
+}
 
 /*
  * This is also useful for kernel threads that want to temporarily
@@ -2105,30 +2337,29 @@ long do_no_restart_syscall(struct restart_block *param)
  */
 int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 {
-	int error;
+	struct task_struct *tsk = current;
+	sigset_t newset;
 
-	spin_lock_irq(&current->sighand->siglock);
+	/* Lockless, only current can change ->blocked, never from irq */
 	if (oldset)
-		*oldset = current->blocked;
+		*oldset = tsk->blocked;
 
-	error = 0;
 	switch (how) {
 	case SIG_BLOCK:
-		sigorsets(&current->blocked, &current->blocked, set);
+		sigorsets(&newset, &tsk->blocked, set);
 		break;
 	case SIG_UNBLOCK:
-		signandsets(&current->blocked, &current->blocked, set);
+		sigandnsets(&newset, &tsk->blocked, set);
 		break;
 	case SIG_SETMASK:
-		current->blocked = *set;
+		newset = *set;
 		break;
 	default:
-		error = -EINVAL;
+		return -EINVAL;
 	}
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
 
-	return error;
+	set_current_blocked(&newset);
+	return 0;
 }
 
 /**
@@ -2138,40 +2369,34 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
  *  @oset: previous value of signal mask if non-null
  *  @sigsetsize: size of sigset_t type
  */
-SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
+SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
 		sigset_t __user *, oset, size_t, sigsetsize)
 {
-	int error = -EINVAL;
 	sigset_t old_set, new_set;
+	int error;
 
 	/* XXX: Don't preclude handling different sized sigset_t's.  */
 	if (sigsetsize != sizeof(sigset_t))
-		goto out;
+		return -EINVAL;
 
-	if (set) {
-		error = -EFAULT;
-		if (copy_from_user(&new_set, set, sizeof(*set)))
-			goto out;
+	old_set = current->blocked;
+
+	if (nset) {
+		if (copy_from_user(&new_set, nset, sizeof(sigset_t)))
+			return -EFAULT;
 		sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
 
-		error = sigprocmask(how, &new_set, &old_set);
+		error = sigprocmask(how, &new_set, NULL);
 		if (error)
-			goto out;
-		if (oset)
-			goto set_old;
-	} else if (oset) {
-		spin_lock_irq(&current->sighand->siglock);
-		old_set = current->blocked;
-		spin_unlock_irq(&current->sighand->siglock);
+			return error;
+	}
 
-	set_old:
-		error = -EFAULT;
-		if (copy_to_user(oset, &old_set, sizeof(*oset)))
-			goto out;
+	if (oset) {
+		if (copy_to_user(oset, &old_set, sizeof(sigset_t)))
+			return -EFAULT;
 	}
-	error = 0;
-out:
-	return error;
+
+	return 0;
 }
 
 long do_sigpending(void __user *set, unsigned long sigsetsize)
@@ -2284,6 +2509,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
 #endif
 
 /**
+ *  do_sigtimedwait - wait for queued signals specified in @which
+ *  @which: queued signals to wait for
+ *  @info: if non-null, the signal's siginfo is returned here
+ *  @ts: upper bound on process time suspension
+ */
+int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
+			const struct timespec *ts)
+{
+	struct task_struct *tsk = current;
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	sigset_t mask = *which;
+	int sig;
+
+	if (ts) {
+		if (!timespec_valid(ts))
+			return -EINVAL;
+		timeout = timespec_to_jiffies(ts);
+		/*
+		 * We can be close to the next tick, add another one
+		 * to ensure we will wait at least the time asked for.
+		 */
+		if (ts->tv_sec || ts->tv_nsec)
+			timeout++;
+	}
+
+	/*
+	 * Invert the set of allowed signals to get those we want to block.
+	 */
+	sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+	signotset(&mask);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	sig = dequeue_signal(tsk, &mask, info);
+	if (!sig && timeout) {
+		/*
+		 * None ready, temporarily unblock those we're interested
+		 * while we are sleeping in so that we'll be awakened when
+		 * they arrive. Unblocking is always fine, we can avoid
+		 * set_current_blocked().
+		 */
+		tsk->real_blocked = tsk->blocked;
+		sigandsets(&tsk->blocked, &tsk->blocked, &mask);
+		recalc_sigpending();
+		spin_unlock_irq(&tsk->sighand->siglock);
+
+		timeout = schedule_timeout_interruptible(timeout);
+
+		spin_lock_irq(&tsk->sighand->siglock);
+		__set_task_blocked(tsk, &tsk->real_blocked);
+		siginitset(&tsk->real_blocked, 0);
+		sig = dequeue_signal(tsk, &mask, info);
+	}
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (sig)
+		return sig;
+	return timeout ? -EINTR : -EAGAIN;
+}
+
+/**
  *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
  *			in @uthese
  *  @uthese: queued signals to wait for
@@ -2295,11 +2580,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 		siginfo_t __user *, uinfo, const struct timespec __user *, uts,
 		size_t, sigsetsize)
 {
-	int ret, sig;
 	sigset_t these;
 	struct timespec ts;
 	siginfo_t info;
-	long timeout = 0;
+	int ret;
 
 	/* XXX: Don't preclude handling different sized sigset_t's.  */
 	if (sigsetsize != sizeof(sigset_t))
@@ -2308,61 +2592,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 	if (copy_from_user(&these, uthese, sizeof(these)))
 		return -EFAULT;
 
-	/*
-	 * Invert the set of allowed signals to get those we
-	 * want to block.
-	 */
-	sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
-	signotset(&these);
-
 	if (uts) {
 		if (copy_from_user(&ts, uts, sizeof(ts)))
 			return -EFAULT;
-		if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
-		    || ts.tv_sec < 0)
-			return -EINVAL;
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	sig = dequeue_signal(current, &these, &info);
-	if (!sig) {
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (uts)
-			timeout = (timespec_to_jiffies(&ts)
-				   + (ts.tv_sec || ts.tv_nsec));
-
-		if (timeout) {
-			/*
-			 * None ready -- temporarily unblock those we're
-			 * interested while we are sleeping in so that we'll
-			 * be awakened when they arrive.
-			 */
-			current->real_blocked = current->blocked;
-			sigandsets(&current->blocked, &current->blocked, &these);
-			recalc_sigpending();
-			spin_unlock_irq(&current->sighand->siglock);
-
-			timeout = schedule_timeout_interruptible(timeout);
-
-			spin_lock_irq(&current->sighand->siglock);
-			sig = dequeue_signal(current, &these, &info);
-			current->blocked = current->real_blocked;
-			siginitset(&current->real_blocked, 0);
-			recalc_sigpending();
-		}
-	}
-	spin_unlock_irq(&current->sighand->siglock);
+	ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);
 
-	if (sig) {
-		ret = sig;
-		if (uinfo) {
-			if (copy_siginfo_to_user(uinfo, &info))
-				ret = -EFAULT;
-		}
-	} else {
-		ret = -EAGAIN;
-		if (timeout)
-			ret = -EINTR;
+	if (ret > 0 && uinfo) {
+		if (copy_siginfo_to_user(uinfo, &info))
+			ret = -EFAULT;
 	}
 
 	return ret;
@@ -2650,60 +2889,51 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
 /**
  *  sys_sigprocmask - examine and change blocked signals
  *  @how: whether to add, remove, or set signals
- *  @set: signals to add or remove (if non-null)
+ *  @nset: signals to add or remove (if non-null)
  *  @oset: previous value of signal mask if non-null
  *
  * Some platforms have their own version with special arguments;
  * others support only sys_rt_sigprocmask.
  */
 
-SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
+SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
 		old_sigset_t __user *, oset)
 {
-	int error;
 	old_sigset_t old_set, new_set;
+	sigset_t new_blocked;
 
-	if (set) {
-		error = -EFAULT;
-		if (copy_from_user(&new_set, set, sizeof(*set)))
-			goto out;
+	old_set = current->blocked.sig[0];
+
+	if (nset) {
+		if (copy_from_user(&new_set, nset, sizeof(*nset)))
+			return -EFAULT;
 		new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
 
-		spin_lock_irq(&current->sighand->siglock);
-		old_set = current->blocked.sig[0];
+		new_blocked = current->blocked;
 
-		error = 0;
 		switch (how) {
-		default:
-			error = -EINVAL;
-			break;
 		case SIG_BLOCK:
-			sigaddsetmask(&current->blocked, new_set);
+			sigaddsetmask(&new_blocked, new_set);
 			break;
 		case SIG_UNBLOCK:
-			sigdelsetmask(&current->blocked, new_set);
+			sigdelsetmask(&new_blocked, new_set);
 			break;
 		case SIG_SETMASK:
-			current->blocked.sig[0] = new_set;
+			new_blocked.sig[0] = new_set;
 			break;
+		default:
+			return -EINVAL;
 		}
 
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-		if (error)
-			goto out;
-		if (oset)
-			goto set_old;
-	} else if (oset) {
-		old_set = current->blocked.sig[0];
-	set_old:
-		error = -EFAULT;
+		set_current_blocked(&new_blocked);
+	}
+
+	if (oset) {
 		if (copy_to_user(oset, &old_set, sizeof(*oset)))
-			goto out;
+			return -EFAULT;
 	}
-	error = 0;
-out:
-	return error;
+
+	return 0;
 }
 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 174f976c2874..13960170cad4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
 char *softirq_to_name[NR_SOFTIRQS] = {
 	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
-	"TASKLET", "SCHED", "HRTIMER",	"RCU"
+	"TASKLET", "SCHED", "HRTIMER"
 };
 
 /*
diff --git a/kernel/sys.c b/kernel/sys.c
index af468edf096a..e4128b278f23 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -314,8 +314,8 @@ void kernel_restart_prepare(char *cmd)
 {
 	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
+	usermodehelper_disable();
 	device_shutdown();
-	sysdev_shutdown();
 	syscore_shutdown();
 }
 
@@ -344,6 +344,7 @@ static void kernel_shutdown_prepare(enum system_states state)
 	blocking_notifier_call_chain(&reboot_notifier_list,
 		(state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
 	system_state = state;
+	usermodehelper_disable();
 	device_shutdown();
 }
 /**
@@ -354,7 +355,6 @@ static void kernel_shutdown_prepare(enum system_states state)
 void kernel_halt(void)
 {
 	kernel_shutdown_prepare(SYSTEM_HALT);
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "System halted.\n");
 	kmsg_dump(KMSG_DUMP_HALT);
@@ -374,7 +374,6 @@ void kernel_power_off(void)
 	if (pm_power_off_prepare)
 		pm_power_off_prepare();
 	disable_nonboot_cpus();
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "Power down.\n");
 	kmsg_dump(KMSG_DUMP_POWEROFF);
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index b0425991e9ac..e2fd74b8e8c2 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,5 +1,5 @@
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
-obj-y += timeconv.o posix-clock.o
+obj-y += timeconv.o posix-clock.o alarmtimer.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
new file mode 100644
index 000000000000..9265014cb4db
--- /dev/null
+++ b/kernel/time/alarmtimer.c
@@ -0,0 +1,694 @@
+/*
+ * Alarmtimer interface
+ *
+ * This interface provides a timer which is similarto hrtimers,
+ * but triggers a RTC alarm if the box is suspend.
+ *
+ * This interface is influenced by the Android RTC Alarm timer
+ * interface.
+ *
+ * Copyright (C) 2010 IBM Corperation
+ *
+ * Author: John Stultz <john.stultz@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/time.h>
+#include <linux/hrtimer.h>
+#include <linux/timerqueue.h>
+#include <linux/rtc.h>
+#include <linux/alarmtimer.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/posix-timers.h>
+#include <linux/workqueue.h>
+#include <linux/freezer.h>
+
+/**
+ * struct alarm_base - Alarm timer bases
+ * @lock:		Lock for syncrhonized access to the base
+ * @timerqueue:		Timerqueue head managing the list of events
+ * @timer: 		hrtimer used to schedule events while running
+ * @gettime:		Function to read the time correlating to the base
+ * @base_clockid:	clockid for the base
+ */
+static struct alarm_base {
+	spinlock_t		lock;
+	struct timerqueue_head	timerqueue;
+	struct hrtimer		timer;
+	ktime_t			(*gettime)(void);
+	clockid_t		base_clockid;
+} alarm_bases[ALARM_NUMTYPE];
+
+#ifdef CONFIG_RTC_CLASS
+/* rtc timer and device for setting alarm wakeups at suspend */
+static struct rtc_timer		rtctimer;
+static struct rtc_device	*rtcdev;
+#endif
+
+/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
+static ktime_t freezer_delta;
+static DEFINE_SPINLOCK(freezer_delta_lock);
+
+
+/**
+ * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
+ * @base: pointer to the base where the timer is being run
+ * @alarm: pointer to alarm being enqueued.
+ *
+ * Adds alarm to a alarm_base timerqueue and if necessary sets
+ * an hrtimer to run.
+ *
+ * Must hold base->lock when calling.
+ */
+static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
+{
+	timerqueue_add(&base->timerqueue, &alarm->node);
+	if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
+		hrtimer_try_to_cancel(&base->timer);
+		hrtimer_start(&base->timer, alarm->node.expires,
+				HRTIMER_MODE_ABS);
+	}
+}
+
+/**
+ * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
+ * @base: pointer to the base where the timer is running
+ * @alarm: pointer to alarm being removed
+ *
+ * Removes alarm to a alarm_base timerqueue and if necessary sets
+ * a new timer to run.
+ *
+ * Must hold base->lock when calling.
+ */
+static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
+{
+	struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
+
+	timerqueue_del(&base->timerqueue, &alarm->node);
+	if (next == &alarm->node) {
+		hrtimer_try_to_cancel(&base->timer);
+		next = timerqueue_getnext(&base->timerqueue);
+		if (!next)
+			return;
+		hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
+	}
+}
+
+
+/**
+ * alarmtimer_fired - Handles alarm hrtimer being fired.
+ * @timer: pointer to hrtimer being run
+ *
+ * When a alarm timer fires, this runs through the timerqueue to
+ * see which alarms expired, and runs those. If there are more alarm
+ * timers queued for the future, we set the hrtimer to fire when
+ * when the next future alarm timer expires.
+ */
+static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
+{
+	struct alarm_base *base = container_of(timer, struct alarm_base, timer);
+	struct timerqueue_node *next;
+	unsigned long flags;
+	ktime_t now;
+	int ret = HRTIMER_NORESTART;
+
+	spin_lock_irqsave(&base->lock, flags);
+	now = base->gettime();
+	while ((next = timerqueue_getnext(&base->timerqueue))) {
+		struct alarm *alarm;
+		ktime_t expired = next->expires;
+
+		if (expired.tv64 >= now.tv64)
+			break;
+
+		alarm = container_of(next, struct alarm, node);
+
+		timerqueue_del(&base->timerqueue, &alarm->node);
+		alarm->enabled = 0;
+		/* Re-add periodic timers */
+		if (alarm->period.tv64) {
+			alarm->node.expires = ktime_add(expired, alarm->period);
+			timerqueue_add(&base->timerqueue, &alarm->node);
+			alarm->enabled = 1;
+		}
+		spin_unlock_irqrestore(&base->lock, flags);
+		if (alarm->function)
+			alarm->function(alarm);
+		spin_lock_irqsave(&base->lock, flags);
+	}
+
+	if (next) {
+		hrtimer_set_expires(&base->timer, next->expires);
+		ret = HRTIMER_RESTART;
+	}
+	spin_unlock_irqrestore(&base->lock, flags);
+
+	return ret;
+
+}
+
+#ifdef CONFIG_RTC_CLASS
+/**
+ * alarmtimer_suspend - Suspend time callback
+ * @dev: unused
+ * @state: unused
+ *
+ * When we are going into suspend, we look through the bases
+ * to see which is the soonest timer to expire. We then
+ * set an rtc timer to fire that far into the future, which
+ * will wake us from suspend.
+ */
+static int alarmtimer_suspend(struct device *dev)
+{
+	struct rtc_time tm;
+	ktime_t min, now;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	min = freezer_delta;
+	freezer_delta = ktime_set(0, 0);
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+
+	/* If we have no rtcdev, just return */
+	if (!rtcdev)
+		return 0;
+
+	/* Find the soonest timer to expire*/
+	for (i = 0; i < ALARM_NUMTYPE; i++) {
+		struct alarm_base *base = &alarm_bases[i];
+		struct timerqueue_node *next;
+		ktime_t delta;
+
+		spin_lock_irqsave(&base->lock, flags);
+		next = timerqueue_getnext(&base->timerqueue);
+		spin_unlock_irqrestore(&base->lock, flags);
+		if (!next)
+			continue;
+		delta = ktime_sub(next->expires, base->gettime());
+		if (!min.tv64 || (delta.tv64 < min.tv64))
+			min = delta;
+	}
+	if (min.tv64 == 0)
+		return 0;
+
+	/* XXX - Should we enforce a minimum sleep time? */
+	WARN_ON(min.tv64 < NSEC_PER_SEC);
+
+	/* Setup an rtc timer to fire that far in the future */
+	rtc_timer_cancel(rtcdev, &rtctimer);
+	rtc_read_time(rtcdev, &tm);
+	now = rtc_tm_to_ktime(tm);
+	now = ktime_add(now, min);
+
+	rtc_timer_start(rtcdev, &rtctimer, now, ktime_set(0, 0));
+
+	return 0;
+}
+#else
+static int alarmtimer_suspend(struct device *dev)
+{
+	return 0;
+}
+#endif
+
+static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
+{
+	ktime_t delta;
+	unsigned long flags;
+	struct alarm_base *base = &alarm_bases[type];
+
+	delta = ktime_sub(absexp, base->gettime());
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64))
+		freezer_delta = delta;
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+}
+
+
+/**
+ * alarm_init - Initialize an alarm structure
+ * @alarm: ptr to alarm to be initialized
+ * @type: the type of the alarm
+ * @function: callback that is run when the alarm fires
+ */
+void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
+		void (*function)(struct alarm *))
+{
+	timerqueue_init(&alarm->node);
+	alarm->period = ktime_set(0, 0);
+	alarm->function = function;
+	alarm->type = type;
+	alarm->enabled = 0;
+}
+
+/**
+ * alarm_start - Sets an alarm to fire
+ * @alarm: ptr to alarm to set
+ * @start: time to run the alarm
+ * @period: period at which the alarm will recur
+ */
+void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	if (alarm->enabled)
+		alarmtimer_remove(base, alarm);
+	alarm->node.expires = start;
+	alarm->period = period;
+	alarmtimer_enqueue(base, alarm);
+	alarm->enabled = 1;
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+/**
+ * alarm_cancel - Tries to cancel an alarm timer
+ * @alarm: ptr to alarm to be canceled
+ */
+void alarm_cancel(struct alarm *alarm)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	if (alarm->enabled)
+		alarmtimer_remove(base, alarm);
+	alarm->enabled = 0;
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+
+/**
+ * clock2alarm - helper that converts from clockid to alarmtypes
+ * @clockid: clockid.
+ */
+static enum alarmtimer_type clock2alarm(clockid_t clockid)
+{
+	if (clockid == CLOCK_REALTIME_ALARM)
+		return ALARM_REALTIME;
+	if (clockid == CLOCK_BOOTTIME_ALARM)
+		return ALARM_BOOTTIME;
+	return -1;
+}
+
+/**
+ * alarm_handle_timer - Callback for posix timers
+ * @alarm: alarm that fired
+ *
+ * Posix timer callback for expired alarm timers.
+ */
+static void alarm_handle_timer(struct alarm *alarm)
+{
+	struct k_itimer *ptr = container_of(alarm, struct k_itimer,
+						it.alarmtimer);
+	if (posix_timer_event(ptr, 0) != 0)
+		ptr->it_overrun++;
+}
+
+/**
+ * alarm_clock_getres - posix getres interface
+ * @which_clock: clockid
+ * @tp: timespec to fill
+ *
+ * Returns the granularity of underlying alarm base clock
+ */
+static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+	clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
+
+	return hrtimer_get_res(baseid, tp);
+}
+
+/**
+ * alarm_clock_get - posix clock_get interface
+ * @which_clock: clockid
+ * @tp: timespec to fill.
+ *
+ * Provides the underlying alarm base time.
+ */
+static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
+{
+	struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
+
+	*tp = ktime_to_timespec(base->gettime());
+	return 0;
+}
+
+/**
+ * alarm_timer_create - posix timer_create interface
+ * @new_timer: k_itimer pointer to manage
+ *
+ * Initializes the k_itimer structure.
+ */
+static int alarm_timer_create(struct k_itimer *new_timer)
+{
+	enum  alarmtimer_type type;
+	struct alarm_base *base;
+
+	if (!capable(CAP_WAKE_ALARM))
+		return -EPERM;
+
+	type = clock2alarm(new_timer->it_clock);
+	base = &alarm_bases[type];
+	alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer);
+	return 0;
+}
+
+/**
+ * alarm_timer_get - posix timer_get interface
+ * @new_timer: k_itimer pointer
+ * @cur_setting: itimerspec data to fill
+ *
+ * Copies the itimerspec data out from the k_itimer
+ */
+static void alarm_timer_get(struct k_itimer *timr,
+				struct itimerspec *cur_setting)
+{
+	cur_setting->it_interval =
+			ktime_to_timespec(timr->it.alarmtimer.period);
+	cur_setting->it_value =
+			ktime_to_timespec(timr->it.alarmtimer.node.expires);
+	return;
+}
+
+/**
+ * alarm_timer_del - posix timer_del interface
+ * @timr: k_itimer pointer to be deleted
+ *
+ * Cancels any programmed alarms for the given timer.
+ */
+static int alarm_timer_del(struct k_itimer *timr)
+{
+	alarm_cancel(&timr->it.alarmtimer);
+	return 0;
+}
+
+/**
+ * alarm_timer_set - posix timer_set interface
+ * @timr: k_itimer pointer to be deleted
+ * @flags: timer flags
+ * @new_setting: itimerspec to be used
+ * @old_setting: itimerspec being replaced
+ *
+ * Sets the timer to new_setting, and starts the timer.
+ */
+static int alarm_timer_set(struct k_itimer *timr, int flags,
+				struct itimerspec *new_setting,
+				struct itimerspec *old_setting)
+{
+	/* Save old values */
+	old_setting->it_interval =
+			ktime_to_timespec(timr->it.alarmtimer.period);
+	old_setting->it_value =
+			ktime_to_timespec(timr->it.alarmtimer.node.expires);
+
+	/* If the timer was already set, cancel it */
+	alarm_cancel(&timr->it.alarmtimer);
+
+	/* start the timer */
+	alarm_start(&timr->it.alarmtimer,
+			timespec_to_ktime(new_setting->it_value),
+			timespec_to_ktime(new_setting->it_interval));
+	return 0;
+}
+
+/**
+ * alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep
+ * @alarm: ptr to alarm that fired
+ *
+ * Wakes up the task that set the alarmtimer
+ */
+static void alarmtimer_nsleep_wakeup(struct alarm *alarm)
+{
+	struct task_struct *task = (struct task_struct *)alarm->data;
+
+	alarm->data = NULL;
+	if (task)
+		wake_up_process(task);
+}
+
+/**
+ * alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation
+ * @alarm: ptr to alarmtimer
+ * @absexp: absolute expiration time
+ *
+ * Sets the alarm timer and sleeps until it is fired or interrupted.
+ */
+static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp)
+{
+	alarm->data = (void *)current;
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		alarm_start(alarm, absexp, ktime_set(0, 0));
+		if (likely(alarm->data))
+			schedule();
+
+		alarm_cancel(alarm);
+	} while (alarm->data && !signal_pending(current));
+
+	__set_current_state(TASK_RUNNING);
+
+	return (alarm->data == NULL);
+}
+
+
+/**
+ * update_rmtp - Update remaining timespec value
+ * @exp: expiration time
+ * @type: timer type
+ * @rmtp: user pointer to remaining timepsec value
+ *
+ * Helper function that fills in rmtp value with time between
+ * now and the exp value
+ */
+static int update_rmtp(ktime_t exp, enum  alarmtimer_type type,
+			struct timespec __user *rmtp)
+{
+	struct timespec rmt;
+	ktime_t rem;
+
+	rem = ktime_sub(exp, alarm_bases[type].gettime());
+
+	if (rem.tv64 <= 0)
+		return 0;
+	rmt = ktime_to_timespec(rem);
+
+	if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+		return -EFAULT;
+
+	return 1;
+
+}
+
+/**
+ * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep
+ * @restart: ptr to restart block
+ *
+ * Handles restarted clock_nanosleep calls
+ */
+static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
+{
+	enum  alarmtimer_type type = restart->nanosleep.index;
+	ktime_t exp;
+	struct timespec __user  *rmtp;
+	struct alarm alarm;
+	int ret = 0;
+
+	exp.tv64 = restart->nanosleep.expires;
+	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+
+	if (alarmtimer_do_nsleep(&alarm, exp))
+		goto out;
+
+	if (freezing(current))
+		alarmtimer_freezerset(exp, type);
+
+	rmtp = restart->nanosleep.rmtp;
+	if (rmtp) {
+		ret = update_rmtp(exp, type, rmtp);
+		if (ret <= 0)
+			goto out;
+	}
+
+
+	/* The other values in restart are already filled in */
+	ret = -ERESTART_RESTARTBLOCK;
+out:
+	return ret;
+}
+
+/**
+ * alarm_timer_nsleep - alarmtimer nanosleep
+ * @which_clock: clockid
+ * @flags: determins abstime or relative
+ * @tsreq: requested sleep time (abs or rel)
+ * @rmtp: remaining sleep time saved
+ *
+ * Handles clock_nanosleep calls against _ALARM clockids
+ */
+static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
+		     struct timespec *tsreq, struct timespec __user *rmtp)
+{
+	enum  alarmtimer_type type = clock2alarm(which_clock);
+	struct alarm alarm;
+	ktime_t exp;
+	int ret = 0;
+	struct restart_block *restart;
+
+	if (!capable(CAP_WAKE_ALARM))
+		return -EPERM;
+
+	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+
+	exp = timespec_to_ktime(*tsreq);
+	/* Convert (if necessary) to absolute time */
+	if (flags != TIMER_ABSTIME) {
+		ktime_t now = alarm_bases[type].gettime();
+		exp = ktime_add(now, exp);
+	}
+
+	if (alarmtimer_do_nsleep(&alarm, exp))
+		goto out;
+
+	if (freezing(current))
+		alarmtimer_freezerset(exp, type);
+
+	/* abs timers don't set remaining time or restart */
+	if (flags == TIMER_ABSTIME) {
+		ret = -ERESTARTNOHAND;
+		goto out;
+	}
+
+	if (rmtp) {
+		ret = update_rmtp(exp, type, rmtp);
+		if (ret <= 0)
+			goto out;
+	}
+
+	restart = &current_thread_info()->restart_block;
+	restart->fn = alarm_timer_nsleep_restart;
+	restart->nanosleep.index = type;
+	restart->nanosleep.expires = exp.tv64;
+	restart->nanosleep.rmtp = rmtp;
+	ret = -ERESTART_RESTARTBLOCK;
+
+out:
+	return ret;
+}
+
+
+/* Suspend hook structures */
+static const struct dev_pm_ops alarmtimer_pm_ops = {
+	.suspend = alarmtimer_suspend,
+};
+
+static struct platform_driver alarmtimer_driver = {
+	.driver = {
+		.name = "alarmtimer",
+		.pm = &alarmtimer_pm_ops,
+	}
+};
+
+/**
+ * alarmtimer_init - Initialize alarm timer code
+ *
+ * This function initializes the alarm bases and registers
+ * the posix clock ids.
+ */
+static int __init alarmtimer_init(void)
+{
+	int error = 0;
+	int i;
+	struct k_clock alarm_clock = {
+		.clock_getres	= alarm_clock_getres,
+		.clock_get	= alarm_clock_get,
+		.timer_create	= alarm_timer_create,
+		.timer_set	= alarm_timer_set,
+		.timer_del	= alarm_timer_del,
+		.timer_get	= alarm_timer_get,
+		.nsleep		= alarm_timer_nsleep,
+	};
+
+	posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
+	posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
+
+	/* Initialize alarm bases */
+	alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
+	alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
+	alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
+	alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
+	for (i = 0; i < ALARM_NUMTYPE; i++) {
+		timerqueue_init_head(&alarm_bases[i].timerqueue);
+		spin_lock_init(&alarm_bases[i].lock);
+		hrtimer_init(&alarm_bases[i].timer,
+				alarm_bases[i].base_clockid,
+				HRTIMER_MODE_ABS);
+		alarm_bases[i].timer.function = alarmtimer_fired;
+	}
+	error = platform_driver_register(&alarmtimer_driver);
+	platform_device_register_simple("alarmtimer", -1, NULL, 0);
+
+	return error;
+}
+device_initcall(alarmtimer_init);
+
+#ifdef CONFIG_RTC_CLASS
+/**
+ * has_wakealarm - check rtc device has wakealarm ability
+ * @dev: current device
+ * @name_ptr: name to be returned
+ *
+ * This helper function checks to see if the rtc device can wake
+ * from suspend.
+ */
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(const char **)name_ptr = dev_name(dev);
+	return 1;
+}
+
+/**
+ * alarmtimer_init_late - Late initializing of alarmtimer code
+ *
+ * This function locates a rtc device to use for wakealarms.
+ * Run as late_initcall to make sure rtc devices have been
+ * registered.
+ */
+static int __init alarmtimer_init_late(void)
+{
+	char *str;
+
+	/* Find an rtc device and init the rtc_timer */
+	class_find_device(rtc_class, NULL, &str, has_wakealarm);
+	if (str)
+		rtcdev = rtc_class_open(str);
+	if (!rtcdev) {
+		printk(KERN_WARNING "No RTC device found, ALARM timers will"
+			" not wake from suspend");
+	}
+	rtc_timer_init(&rtctimer, NULL, NULL);
+
+	return 0;
+}
+#else
+static int __init alarmtimer_init_late(void)
+{
+	printk(KERN_WARNING "Kernel not built with RTC support, ALARM timers"
+		" will not wake from suspend");
+	return 0;
+}
+#endif
+late_initcall(alarmtimer_init_late);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 0d74b9ba90c8..c027d4f602f1 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -194,6 +194,70 @@ void clockevents_register_device(struct clock_event_device *dev)
 }
 EXPORT_SYMBOL_GPL(clockevents_register_device);
 
+static void clockevents_config(struct clock_event_device *dev,
+			       u32 freq)
+{
+	u64 sec;
+
+	if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+		return;
+
+	/*
+	 * Calculate the maximum number of seconds we can sleep. Limit
+	 * to 10 minutes for hardware which can program more than
+	 * 32bit ticks so we still get reasonable conversion values.
+	 */
+	sec = dev->max_delta_ticks;
+	do_div(sec, freq);
+	if (!sec)
+		sec = 1;
+	else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
+		sec = 600;
+
+	clockevents_calc_mult_shift(dev, freq, sec);
+	dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev);
+	dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
+}
+
+/**
+ * clockevents_config_and_register - Configure and register a clock event device
+ * @dev:	device to register
+ * @freq:	The clock frequency
+ * @min_delta:	The minimum clock ticks to program in oneshot mode
+ * @max_delta:	The maximum clock ticks to program in oneshot mode
+ *
+ * min/max_delta can be 0 for devices which do not support oneshot mode.
+ */
+void clockevents_config_and_register(struct clock_event_device *dev,
+				     u32 freq, unsigned long min_delta,
+				     unsigned long max_delta)
+{
+	dev->min_delta_ticks = min_delta;
+	dev->max_delta_ticks = max_delta;
+	clockevents_config(dev, freq);
+	clockevents_register_device(dev);
+}
+
+/**
+ * clockevents_update_freq - Update frequency and reprogram a clock event device.
+ * @dev:	device to modify
+ * @freq:	new device frequency
+ *
+ * Reconfigure and reprogram a clock event device in oneshot
+ * mode. Must be called on the cpu for which the device delivers per
+ * cpu timer events with interrupts disabled!  Returns 0 on success,
+ * -ETIME when the event is in the past.
+ */
+int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
+{
+	clockevents_config(dev, freq);
+
+	if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+		return 0;
+
+	return clockevents_program_event(dev, dev->next_event, ktime_get());
+}
+
 /*
  * Noop handler when we shut down an event device
  */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 6519cf62d9cd..1c95fd677328 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -626,19 +626,6 @@ static void clocksource_enqueue(struct clocksource *cs)
 	list_add(&cs->list, entry);
 }
 
-
-/*
- * Maximum time we expect to go between ticks. This includes idle
- * tickless time. It provides the trade off between selecting a
- * mult/shift pair that is very precise but can only handle a short
- * period of time, vs. a mult/shift pair that can handle long periods
- * of time but isn't as precise.
- *
- * This is a subsystem constant, and actual hardware limitations
- * may override it (ie: clocksources that wrap every 3 seconds).
- */
-#define MAX_UPDATE_LENGTH 5 /* Seconds */
-
 /**
  * __clocksource_updatefreq_scale - Used update clocksource with new freq
  * @t:		clocksource to be registered
@@ -652,15 +639,28 @@ static void clocksource_enqueue(struct clocksource *cs)
  */
 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
+	u64 sec;
+
 	/*
-	 * Ideally we want to use  some of the limits used in
-	 * clocksource_max_deferment, to provide a more informed
-	 * MAX_UPDATE_LENGTH. But for now this just gets the
-	 * register interface working properly.
+	 * Calc the maximum number of seconds which we can run before
+	 * wrapping around. For clocksources which have a mask > 32bit
+	 * we need to limit the max sleep time to have a good
+	 * conversion precision. 10 minutes is still a reasonable
+	 * amount. That results in a shift value of 24 for a
+	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
+	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
+	 * margin as we do in clocksource_max_deferment()
 	 */
+	sec = (cs->mask - (cs->mask >> 5));
+	do_div(sec, freq);
+	do_div(sec, scale);
+	if (!sec)
+		sec = 1;
+	else if (sec > 600 && cs->mask > UINT_MAX)
+		sec = 600;
+
 	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
-				      NSEC_PER_SEC/scale,
-				      MAX_UPDATE_LENGTH*scale);
+			       NSEC_PER_SEC / scale, sec * scale);
 	cs->max_idle_ns = clocksource_max_deferment(cs);
 }
 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
@@ -685,8 +685,8 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 	/* Add clocksource to the clcoksource list */
 	mutex_lock(&clocksource_mutex);
 	clocksource_enqueue(cs);
-	clocksource_select();
 	clocksource_enqueue_watchdog(cs);
+	clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
@@ -706,8 +706,8 @@ int clocksource_register(struct clocksource *cs)
 
 	mutex_lock(&clocksource_mutex);
 	clocksource_enqueue(cs);
-	clocksource_select();
 	clocksource_enqueue_watchdog(cs);
+	clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 25028dd4fa18..c340ca658f37 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -19,7 +19,6 @@
  */
 #include <linux/device.h>
 #include <linux/file.h>
-#include <linux/mutex.h>
 #include <linux/posix-clock.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
@@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp)
 {
 	struct posix_clock *clk = fp->private_data;
 
-	mutex_lock(&clk->mutex);
+	down_read(&clk->rwsem);
 
 	if (!clk->zombie)
 		return clk;
 
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 
 	return NULL;
 }
 
 static void put_posix_clock(struct posix_clock *clk)
 {
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 }
 
 static ssize_t posix_clock_read(struct file *fp, char __user *buf,
@@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 	struct posix_clock *clk =
 		container_of(inode->i_cdev, struct posix_clock, cdev);
 
-	mutex_lock(&clk->mutex);
+	down_read(&clk->rwsem);
 
 	if (clk->zombie) {
 		err = -ENODEV;
@@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 		fp->private_data = clk;
 	}
 out:
-	mutex_unlock(&clk->mutex);
+	up_read(&clk->rwsem);
 	return err;
 }
 
@@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid)
 	int err;
 
 	kref_init(&clk->kref);
-	mutex_init(&clk->mutex);
+	init_rwsem(&clk->rwsem);
 
 	cdev_init(&clk->cdev, &posix_clock_file_operations);
 	clk->cdev.owner = clk->ops.owner;
 	err = cdev_add(&clk->cdev, devid, 1);
-	if (err)
-		goto no_cdev;
 
 	return err;
-no_cdev:
-	mutex_destroy(&clk->mutex);
-	return err;
 }
 EXPORT_SYMBOL_GPL(posix_clock_register);
 
 static void delete_clock(struct kref *kref)
 {
 	struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-	mutex_destroy(&clk->mutex);
+
 	if (clk->release)
 		clk->release(clk);
 }
@@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk)
 {
 	cdev_del(&clk->cdev);
 
-	mutex_lock(&clk->mutex);
+	down_write(&clk->rwsem);
 	clk->zombie = true;
-	mutex_unlock(&clk->mutex);
+	up_write(&clk->rwsem);
 
 	kref_put(&clk->kref, delete_clock);
 }
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index da800ffa810c..723c7637e55a 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -522,10 +522,11 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
  */
 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
+	int cpu = smp_processor_id();
+
 	/* Set it up only once ! */
 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
-		int cpu = smp_processor_id();
 
 		bc->event_handler = tick_handle_oneshot_broadcast;
 		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
@@ -551,6 +552,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 			tick_broadcast_set_event(tick_next_period, 1);
 		} else
 			bc->next_event.tv64 = KTIME_MAX;
+	} else {
+		/*
+		 * The first cpu which switches to oneshot mode sets
+		 * the bit for all other cpus which are in the general
+		 * (periodic) broadcast mask. So the bit is set and
+		 * would prevent the first broadcast enter after this
+		 * to program the bc device.
+		 */
+		tick_broadcast_clear_oneshot(cpu);
 	}
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8ad5d576755e..8e6a05a5915a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -596,6 +596,58 @@ void __init timekeeping_init(void)
 static struct timespec timekeeping_suspend_time;
 
 /**
+ * __timekeeping_inject_sleeptime - Internal function to add sleep interval
+ * @delta: pointer to a timespec delta value
+ *
+ * Takes a timespec offset measuring a suspend interval and properly
+ * adds the sleep offset to the timekeeping variables.
+ */
+static void __timekeeping_inject_sleeptime(struct timespec *delta)
+{
+	xtime = timespec_add(xtime, *delta);
+	wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
+	total_sleep_time = timespec_add(total_sleep_time, *delta);
+}
+
+
+/**
+ * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
+ * @delta: pointer to a timespec delta value
+ *
+ * This hook is for architectures that cannot support read_persistent_clock
+ * because their RTC/persistent clock is only accessible when irqs are enabled.
+ *
+ * This function should only be called by rtc_resume(), and allows
+ * a suspend offset to be injected into the timekeeping values.
+ */
+void timekeeping_inject_sleeptime(struct timespec *delta)
+{
+	unsigned long flags;
+	struct timespec ts;
+
+	/* Make sure we don't set the clock twice */
+	read_persistent_clock(&ts);
+	if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
+		return;
+
+	write_seqlock_irqsave(&xtime_lock, flags);
+	timekeeping_forward_now();
+
+	__timekeeping_inject_sleeptime(delta);
+
+	timekeeper.ntp_error = 0;
+	ntp_clear();
+	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+				timekeeper.mult);
+
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+
+	/* signal hrtimers about time change */
+	clock_was_set();
+}
+
+
+/**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
  *
  * This is for the generic clocksource timekeeping.
@@ -615,9 +667,7 @@ static void timekeeping_resume(void)
 
 	if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
 		ts = timespec_sub(ts, timekeeping_suspend_time);
-		xtime = timespec_add(xtime, ts);
-		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
-		total_sleep_time = timespec_add(total_sleep_time, ts);
+		__timekeeping_inject_sleeptime(&ts);
 	}
 	/* re-base the last cycle value */
 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61d7d59f4a1a..2ad39e556cb4 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,7 +141,7 @@ if FTRACE
 config FUNCTION_TRACER
 	bool "Kernel Function Tracer"
 	depends on HAVE_FUNCTION_TRACER
-	select FRAME_POINTER if !ARM_UNWIND && !S390
+	select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE
 	select KALLSYMS
 	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7aa40f8e182d..6957aa298dfa 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -850,29 +850,21 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
 		__blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
 }
 
-static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q)
+static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
+				    unsigned int depth, bool explicit)
 {
 	struct blk_trace *bt = q->blk_trace;
 
 	if (bt) {
-		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
-		__be64 rpdu = cpu_to_be64(pdu);
+		__be64 rpdu = cpu_to_be64(depth);
+		u32 what;
 
-		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
-				sizeof(rpdu), &rpdu);
-	}
-}
-
-static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (bt) {
-		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
-		__be64 rpdu = cpu_to_be64(pdu);
+		if (explicit)
+			what = BLK_TA_UNPLUG_IO;
+		else
+			what = BLK_TA_UNPLUG_TIMER;
 
-		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
-				sizeof(rpdu), &rpdu);
+		__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
 	}
 }
 
@@ -1015,9 +1007,7 @@ static void blk_register_tracepoints(void)
 	WARN_ON(ret);
 	ret = register_trace_block_plug(blk_add_trace_plug, NULL);
 	WARN_ON(ret);
-	ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
-	WARN_ON(ret);
-	ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
+	ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
 	WARN_ON(ret);
 	ret = register_trace_block_split(blk_add_trace_split, NULL);
 	WARN_ON(ret);
@@ -1032,8 +1022,7 @@ static void blk_unregister_tracepoints(void)
 	unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
 	unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
 	unregister_trace_block_split(blk_add_trace_split, NULL);
-	unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
-	unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
+	unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
 	unregister_trace_block_plug(blk_add_trace_plug, NULL);
 	unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
 	unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ee24fa1935ac..d017c2c82c44 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -39,20 +39,26 @@
 #include "trace_stat.h"
 
 #define FTRACE_WARN_ON(cond)			\
-	do {					\
-		if (WARN_ON(cond))		\
+	({					\
+		int ___r = cond;		\
+		if (WARN_ON(___r))		\
 			ftrace_kill();		\
-	} while (0)
+		___r;				\
+	})
 
 #define FTRACE_WARN_ON_ONCE(cond)		\
-	do {					\
-		if (WARN_ON_ONCE(cond))		\
+	({					\
+		int ___r = cond;		\
+		if (WARN_ON_ONCE(___r))		\
 			ftrace_kill();		\
-	} while (0)
+		___r;				\
+	})
 
 /* hash bits for specific function selection */
 #define FTRACE_HASH_BITS 7
 #define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
+#define FTRACE_HASH_DEFAULT_BITS 10
+#define FTRACE_HASH_MAX_BITS 12
 
 /* ftrace_enabled is a method to turn ftrace on or off */
 int ftrace_enabled __read_mostly;
@@ -81,23 +87,29 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
 	.func		= ftrace_stub,
 };
 
-static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
+static struct ftrace_ops global_ops;
+
+static void
+ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
 
 /*
- * Traverse the ftrace_list, invoking all entries.  The reason that we
+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
  * can use rcu_dereference_raw() is that elements removed from this list
  * are simply leaked, so there is no need to interact with a grace-period
  * mechanism.  The rcu_dereference_raw() calls are needed to handle
- * concurrent insertions into the ftrace_list.
+ * concurrent insertions into the ftrace_global_list.
  *
  * Silly Alpha and silly pointer-speculation compiler optimizations!
  */
-static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_global_list_func(unsigned long ip,
+				    unsigned long parent_ip)
 {
-	struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
+	struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 
 	while (op != &ftrace_list_end) {
 		op->func(ip, parent_ip);
@@ -147,46 +159,69 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
 }
 #endif
 
-static int __register_ftrace_function(struct ftrace_ops *ops)
+static void update_global_ops(void)
 {
-	ops->next = ftrace_list;
+	ftrace_func_t func;
+
 	/*
-	 * We are entering ops into the ftrace_list but another
-	 * CPU might be walking that list. We need to make sure
-	 * the ops->next pointer is valid before another CPU sees
-	 * the ops pointer included into the ftrace_list.
+	 * If there's only one function registered, then call that
+	 * function directly. Otherwise, we need to iterate over the
+	 * registered callers.
 	 */
-	rcu_assign_pointer(ftrace_list, ops);
+	if (ftrace_global_list == &ftrace_list_end ||
+	    ftrace_global_list->next == &ftrace_list_end)
+		func = ftrace_global_list->func;
+	else
+		func = ftrace_global_list_func;
 
-	if (ftrace_enabled) {
-		ftrace_func_t func;
+	/* If we filter on pids, update to use the pid function */
+	if (!list_empty(&ftrace_pids)) {
+		set_ftrace_pid_function(func);
+		func = ftrace_pid_func;
+	}
 
-		if (ops->next == &ftrace_list_end)
-			func = ops->func;
-		else
-			func = ftrace_list_func;
+	global_ops.func = func;
+}
 
-		if (!list_empty(&ftrace_pids)) {
-			set_ftrace_pid_function(func);
-			func = ftrace_pid_func;
-		}
+static void update_ftrace_function(void)
+{
+	ftrace_func_t func;
+
+	update_global_ops();
+
+	/*
+	 * If we are at the end of the list and this ops is
+	 * not dynamic, then have the mcount trampoline call
+	 * the function directly
+	 */
+	if (ftrace_ops_list == &ftrace_list_end ||
+	    (ftrace_ops_list->next == &ftrace_list_end &&
+	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC)))
+		func = ftrace_ops_list->func;
+	else
+		func = ftrace_ops_list_func;
 
-		/*
-		 * For one func, simply call it directly.
-		 * For more than one func, call the chain.
-		 */
 #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
-		ftrace_trace_function = func;
+	ftrace_trace_function = func;
 #else
-		__ftrace_trace_function = func;
-		ftrace_trace_function = ftrace_test_stop_func;
+	__ftrace_trace_function = func;
+	ftrace_trace_function = ftrace_test_stop_func;
 #endif
-	}
+}
 
-	return 0;
+static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
+{
+	ops->next = *list;
+	/*
+	 * We are entering ops into the list but another
+	 * CPU might be walking that list. We need to make sure
+	 * the ops->next pointer is valid before another CPU sees
+	 * the ops pointer included into the list.
+	 */
+	rcu_assign_pointer(*list, ops);
 }
 
-static int __unregister_ftrace_function(struct ftrace_ops *ops)
+static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 {
 	struct ftrace_ops **p;
 
@@ -194,13 +229,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	 * If we are removing the last function, then simply point
 	 * to the ftrace_stub.
 	 */
-	if (ftrace_list == ops && ops->next == &ftrace_list_end) {
-		ftrace_trace_function = ftrace_stub;
-		ftrace_list = &ftrace_list_end;
+	if (*list == ops && ops->next == &ftrace_list_end) {
+		*list = &ftrace_list_end;
 		return 0;
 	}
 
-	for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
+	for (p = list; *p != &ftrace_list_end; p = &(*p)->next)
 		if (*p == ops)
 			break;
 
@@ -208,53 +242,83 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 		return -1;
 
 	*p = (*p)->next;
+	return 0;
+}
 
-	if (ftrace_enabled) {
-		/* If we only have one func left, then call that directly */
-		if (ftrace_list->next == &ftrace_list_end) {
-			ftrace_func_t func = ftrace_list->func;
+static int __register_ftrace_function(struct ftrace_ops *ops)
+{
+	if (ftrace_disabled)
+		return -ENODEV;
 
-			if (!list_empty(&ftrace_pids)) {
-				set_ftrace_pid_function(func);
-				func = ftrace_pid_func;
-			}
-#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
-			ftrace_trace_function = func;
-#else
-			__ftrace_trace_function = func;
-#endif
-		}
-	}
+	if (FTRACE_WARN_ON(ops == &global_ops))
+		return -EINVAL;
+
+	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return -EBUSY;
+
+	if (!core_kernel_data((unsigned long)ops))
+		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
+
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		int first = ftrace_global_list == &ftrace_list_end;
+		add_ftrace_ops(&ftrace_global_list, ops);
+		ops->flags |= FTRACE_OPS_FL_ENABLED;
+		if (first)
+			add_ftrace_ops(&ftrace_ops_list, &global_ops);
+	} else
+		add_ftrace_ops(&ftrace_ops_list, ops);
+
+	if (ftrace_enabled)
+		update_ftrace_function();
 
 	return 0;
 }
 
-static void ftrace_update_pid_func(void)
+static int __unregister_ftrace_function(struct ftrace_ops *ops)
 {
-	ftrace_func_t func;
+	int ret;
 
-	if (ftrace_trace_function == ftrace_stub)
-		return;
+	if (ftrace_disabled)
+		return -ENODEV;
 
-#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
-	func = ftrace_trace_function;
-#else
-	func = __ftrace_trace_function;
-#endif
+	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
+		return -EBUSY;
 
-	if (!list_empty(&ftrace_pids)) {
-		set_ftrace_pid_function(func);
-		func = ftrace_pid_func;
-	} else {
-		if (func == ftrace_pid_func)
-			func = ftrace_pid_function;
-	}
+	if (FTRACE_WARN_ON(ops == &global_ops))
+		return -EINVAL;
 
-#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
-	ftrace_trace_function = func;
-#else
-	__ftrace_trace_function = func;
-#endif
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ret = remove_ftrace_ops(&ftrace_global_list, ops);
+		if (!ret && ftrace_global_list == &ftrace_list_end)
+			ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
+		if (!ret)
+			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+	} else
+		ret = remove_ftrace_ops(&ftrace_ops_list, ops);
+
+	if (ret < 0)
+		return ret;
+
+	if (ftrace_enabled)
+		update_ftrace_function();
+
+	/*
+	 * Dynamic ops may be freed, we must make sure that all
+	 * callers are done before leaving this function.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
+		synchronize_sched();
+
+	return 0;
+}
+
+static void ftrace_update_pid_func(void)
+{
+	/* Only do something if we are tracing something */
+	if (ftrace_trace_function == ftrace_stub)
+		return;
+
+	update_ftrace_function();
 }
 
 #ifdef CONFIG_FUNCTION_PROFILER
@@ -888,8 +952,35 @@ enum {
 	FTRACE_START_FUNC_RET		= (1 << 3),
 	FTRACE_STOP_FUNC_RET		= (1 << 4),
 };
+struct ftrace_func_entry {
+	struct hlist_node hlist;
+	unsigned long ip;
+};
 
-static int ftrace_filtered;
+struct ftrace_hash {
+	unsigned long		size_bits;
+	struct hlist_head	*buckets;
+	unsigned long		count;
+	struct rcu_head		rcu;
+};
+
+/*
+ * We make these constant because no one should touch them,
+ * but they are used as the default "empty hash", to avoid allocating
+ * it all the time. These are in a read only section such that if
+ * anyone does try to modify it, it will cause an exception.
+ */
+static const struct hlist_head empty_buckets[1];
+static const struct ftrace_hash empty_hash = {
+	.buckets = (struct hlist_head *)empty_buckets,
+};
+#define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
+
+static struct ftrace_ops global_ops = {
+	.func			= ftrace_stub,
+	.notrace_hash		= EMPTY_HASH,
+	.filter_hash		= EMPTY_HASH,
+};
 
 static struct dyn_ftrace *ftrace_new_addrs;
 
@@ -912,6 +1003,269 @@ static struct ftrace_page	*ftrace_pages;
 
 static struct dyn_ftrace *ftrace_free_records;
 
+static struct ftrace_func_entry *
+ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
+{
+	unsigned long key;
+	struct ftrace_func_entry *entry;
+	struct hlist_head *hhd;
+	struct hlist_node *n;
+
+	if (!hash->count)
+		return NULL;
+
+	if (hash->size_bits > 0)
+		key = hash_long(ip, hash->size_bits);
+	else
+		key = 0;
+
+	hhd = &hash->buckets[key];
+
+	hlist_for_each_entry_rcu(entry, n, hhd, hlist) {
+		if (entry->ip == ip)
+			return entry;
+	}
+	return NULL;
+}
+
+static void __add_hash_entry(struct ftrace_hash *hash,
+			     struct ftrace_func_entry *entry)
+{
+	struct hlist_head *hhd;
+	unsigned long key;
+
+	if (hash->size_bits)
+		key = hash_long(entry->ip, hash->size_bits);
+	else
+		key = 0;
+
+	hhd = &hash->buckets[key];
+	hlist_add_head(&entry->hlist, hhd);
+	hash->count++;
+}
+
+static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
+{
+	struct ftrace_func_entry *entry;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->ip = ip;
+	__add_hash_entry(hash, entry);
+
+	return 0;
+}
+
+static void
+free_hash_entry(struct ftrace_hash *hash,
+		  struct ftrace_func_entry *entry)
+{
+	hlist_del(&entry->hlist);
+	kfree(entry);
+	hash->count--;
+}
+
+static void
+remove_hash_entry(struct ftrace_hash *hash,
+		  struct ftrace_func_entry *entry)
+{
+	hlist_del(&entry->hlist);
+	hash->count--;
+}
+
+static void ftrace_hash_clear(struct ftrace_hash *hash)
+{
+	struct hlist_head *hhd;
+	struct hlist_node *tp, *tn;
+	struct ftrace_func_entry *entry;
+	int size = 1 << hash->size_bits;
+	int i;
+
+	if (!hash->count)
+		return;
+
+	for (i = 0; i < size; i++) {
+		hhd = &hash->buckets[i];
+		hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist)
+			free_hash_entry(hash, entry);
+	}
+	FTRACE_WARN_ON(hash->count);
+}
+
+static void free_ftrace_hash(struct ftrace_hash *hash)
+{
+	if (!hash || hash == EMPTY_HASH)
+		return;
+	ftrace_hash_clear(hash);
+	kfree(hash->buckets);
+	kfree(hash);
+}
+
+static void __free_ftrace_hash_rcu(struct rcu_head *rcu)
+{
+	struct ftrace_hash *hash;
+
+	hash = container_of(rcu, struct ftrace_hash, rcu);
+	free_ftrace_hash(hash);
+}
+
+static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
+{
+	if (!hash || hash == EMPTY_HASH)
+		return;
+	call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
+}
+
+static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
+{
+	struct ftrace_hash *hash;
+	int size;
+
+	hash = kzalloc(sizeof(*hash), GFP_KERNEL);
+	if (!hash)
+		return NULL;
+
+	size = 1 << size_bits;
+	hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL);
+
+	if (!hash->buckets) {
+		kfree(hash);
+		return NULL;
+	}
+
+	hash->size_bits = size_bits;
+
+	return hash;
+}
+
+static struct ftrace_hash *
+alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
+{
+	struct ftrace_func_entry *entry;
+	struct ftrace_hash *new_hash;
+	struct hlist_node *tp;
+	int size;
+	int ret;
+	int i;
+
+	new_hash = alloc_ftrace_hash(size_bits);
+	if (!new_hash)
+		return NULL;
+
+	/* Empty hash? */
+	if (!hash || !hash->count)
+		return new_hash;
+
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) {
+			ret = add_hash_entry(new_hash, entry->ip);
+			if (ret < 0)
+				goto free_hash;
+		}
+	}
+
+	FTRACE_WARN_ON(new_hash->count != hash->count);
+
+	return new_hash;
+
+ free_hash:
+	free_ftrace_hash(new_hash);
+	return NULL;
+}
+
+static int
+ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
+{
+	struct ftrace_func_entry *entry;
+	struct hlist_node *tp, *tn;
+	struct hlist_head *hhd;
+	struct ftrace_hash *old_hash;
+	struct ftrace_hash *new_hash;
+	unsigned long key;
+	int size = src->count;
+	int bits = 0;
+	int i;
+
+	/*
+	 * If the new source is empty, just free dst and assign it
+	 * the empty_hash.
+	 */
+	if (!src->count) {
+		free_ftrace_hash_rcu(*dst);
+		rcu_assign_pointer(*dst, EMPTY_HASH);
+		return 0;
+	}
+
+	/*
+	 * Make the hash size about 1/2 the # found
+	 */
+	for (size /= 2; size; size >>= 1)
+		bits++;
+
+	/* Don't allocate too much */
+	if (bits > FTRACE_HASH_MAX_BITS)
+		bits = FTRACE_HASH_MAX_BITS;
+
+	new_hash = alloc_ftrace_hash(bits);
+	if (!new_hash)
+		return -ENOMEM;
+
+	size = 1 << src->size_bits;
+	for (i = 0; i < size; i++) {
+		hhd = &src->buckets[i];
+		hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) {
+			if (bits > 0)
+				key = hash_long(entry->ip, bits);
+			else
+				key = 0;
+			remove_hash_entry(src, entry);
+			__add_hash_entry(new_hash, entry);
+		}
+	}
+
+	old_hash = *dst;
+	rcu_assign_pointer(*dst, new_hash);
+	free_ftrace_hash_rcu(old_hash);
+
+	return 0;
+}
+
+/*
+ * Test the hashes for this ops to see if we want to call
+ * the ops->func or not.
+ *
+ * It's a match if the ip is in the ops->filter_hash or
+ * the filter_hash does not exist or is empty,
+ *  AND
+ * the ip is not in the ops->notrace_hash.
+ *
+ * This needs to be called with preemption disabled as
+ * the hashes are freed with call_rcu_sched().
+ */
+static int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+{
+	struct ftrace_hash *filter_hash;
+	struct ftrace_hash *notrace_hash;
+	int ret;
+
+	filter_hash = rcu_dereference_raw(ops->filter_hash);
+	notrace_hash = rcu_dereference_raw(ops->notrace_hash);
+
+	if ((!filter_hash || !filter_hash->count ||
+	     ftrace_lookup_ip(filter_hash, ip)) &&
+	    (!notrace_hash || !notrace_hash->count ||
+	     !ftrace_lookup_ip(notrace_hash, ip)))
+		ret = 1;
+	else
+		ret = 0;
+
+	return ret;
+}
+
 /*
  * This is a double for. Do not use 'break' to break out of the loop,
  * you must use a goto.
@@ -926,6 +1280,105 @@ static struct dyn_ftrace *ftrace_free_records;
 		}				\
 	}
 
+static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
+				     int filter_hash,
+				     bool inc)
+{
+	struct ftrace_hash *hash;
+	struct ftrace_hash *other_hash;
+	struct ftrace_page *pg;
+	struct dyn_ftrace *rec;
+	int count = 0;
+	int all = 0;
+
+	/* Only update if the ops has been registered */
+	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return;
+
+	/*
+	 * In the filter_hash case:
+	 *   If the count is zero, we update all records.
+	 *   Otherwise we just update the items in the hash.
+	 *
+	 * In the notrace_hash case:
+	 *   We enable the update in the hash.
+	 *   As disabling notrace means enabling the tracing,
+	 *   and enabling notrace means disabling, the inc variable
+	 *   gets inversed.
+	 */
+	if (filter_hash) {
+		hash = ops->filter_hash;
+		other_hash = ops->notrace_hash;
+		if (!hash || !hash->count)
+			all = 1;
+	} else {
+		inc = !inc;
+		hash = ops->notrace_hash;
+		other_hash = ops->filter_hash;
+		/*
+		 * If the notrace hash has no items,
+		 * then there's nothing to do.
+		 */
+		if (hash && !hash->count)
+			return;
+	}
+
+	do_for_each_ftrace_rec(pg, rec) {
+		int in_other_hash = 0;
+		int in_hash = 0;
+		int match = 0;
+
+		if (all) {
+			/*
+			 * Only the filter_hash affects all records.
+			 * Update if the record is not in the notrace hash.
+			 */
+			if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
+				match = 1;
+		} else {
+			in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
+			in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
+
+			/*
+			 *
+			 */
+			if (filter_hash && in_hash && !in_other_hash)
+				match = 1;
+			else if (!filter_hash && in_hash &&
+				 (in_other_hash || !other_hash->count))
+				match = 1;
+		}
+		if (!match)
+			continue;
+
+		if (inc) {
+			rec->flags++;
+			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
+				return;
+		} else {
+			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
+				return;
+			rec->flags--;
+		}
+		count++;
+		/* Shortcut, if we handled all records, we are done. */
+		if (!all && count == hash->count)
+			return;
+	} while_for_each_ftrace_rec();
+}
+
+static void ftrace_hash_rec_disable(struct ftrace_ops *ops,
+				    int filter_hash)
+{
+	__ftrace_hash_rec_update(ops, filter_hash, 0);
+}
+
+static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
+				   int filter_hash)
+{
+	__ftrace_hash_rec_update(ops, filter_hash, 1);
+}
+
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
 	rec->freelist = ftrace_free_records;
@@ -1047,18 +1500,18 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	ftrace_addr = (unsigned long)FTRACE_ADDR;
 
 	/*
-	 * If this record is not to be traced or we want to disable it,
-	 * then disable it.
+	 * If we are enabling tracing:
+	 *
+	 *   If the record has a ref count, then we need to enable it
+	 *   because someone is using it.
 	 *
-	 * If we want to enable it and filtering is off, then enable it.
+	 *   Otherwise we make sure its disabled.
 	 *
-	 * If we want to enable it and filtering is on, enable it only if
-	 * it's filtered
+	 * If we are disabling tracing, then disable all records that
+	 * are enabled.
 	 */
-	if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) {
-		if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER))
-			flag = FTRACE_FL_ENABLED;
-	}
+	if (enable && (rec->flags & ~FTRACE_FL_MASK))
+		flag = FTRACE_FL_ENABLED;
 
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
@@ -1079,19 +1532,16 @@ static void ftrace_replace_code(int enable)
 	struct ftrace_page *pg;
 	int failed;
 
+	if (unlikely(ftrace_disabled))
+		return;
+
 	do_for_each_ftrace_rec(pg, rec) {
-		/*
-		 * Skip over free records, records that have
-		 * failed and not converted.
-		 */
-		if (rec->flags & FTRACE_FL_FREE ||
-		    rec->flags & FTRACE_FL_FAILED ||
-		    !(rec->flags & FTRACE_FL_CONVERTED))
+		/* Skip over free records */
+		if (rec->flags & FTRACE_FL_FREE)
 			continue;
 
 		failed = __ftrace_replace_code(rec, enable);
 		if (failed) {
-			rec->flags |= FTRACE_FL_FAILED;
 			ftrace_bug(failed, rec->ip);
 			/* Stop processing */
 			return;
@@ -1107,10 +1557,12 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 
 	ip = rec->ip;
 
+	if (unlikely(ftrace_disabled))
+		return 0;
+
 	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
 	if (ret) {
 		ftrace_bug(ret, ip);
-		rec->flags |= FTRACE_FL_FAILED;
 		return 0;
 	}
 	return 1;
@@ -1171,6 +1623,7 @@ static void ftrace_run_update_code(int command)
 
 static ftrace_func_t saved_ftrace_func;
 static int ftrace_start_up;
+static int global_start_up;
 
 static void ftrace_startup_enable(int command)
 {
@@ -1185,19 +1638,36 @@ static void ftrace_startup_enable(int command)
 	ftrace_run_update_code(command);
 }
 
-static void ftrace_startup(int command)
+static void ftrace_startup(struct ftrace_ops *ops, int command)
 {
+	bool hash_enable = true;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
 	ftrace_start_up++;
 	command |= FTRACE_ENABLE_CALLS;
 
+	/* ops marked global share the filter hashes */
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ops = &global_ops;
+		/* Don't update hash if global is already set */
+		if (global_start_up)
+			hash_enable = false;
+		global_start_up++;
+	}
+
+	ops->flags |= FTRACE_OPS_FL_ENABLED;
+	if (hash_enable)
+		ftrace_hash_rec_enable(ops, 1);
+
 	ftrace_startup_enable(command);
 }
 
-static void ftrace_shutdown(int command)
+static void ftrace_shutdown(struct ftrace_ops *ops, int command)
 {
+	bool hash_disable = true;
+
 	if (unlikely(ftrace_disabled))
 		return;
 
@@ -1209,6 +1679,23 @@ static void ftrace_shutdown(int command)
 	 */
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
+		ops = &global_ops;
+		global_start_up--;
+		WARN_ON_ONCE(global_start_up < 0);
+		/* Don't update hash if global still has users */
+		if (global_start_up) {
+			WARN_ON_ONCE(!ftrace_start_up);
+			hash_disable = false;
+		}
+	}
+
+	if (hash_disable)
+		ftrace_hash_rec_disable(ops, 1);
+
+	if (ops != &global_ops || !global_start_up)
+		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+
 	if (!ftrace_start_up)
 		command |= FTRACE_DISABLE_CALLS;
 
@@ -1273,10 +1760,10 @@ static int ftrace_update_code(struct module *mod)
 		 */
 		if (!ftrace_code_disable(mod, p)) {
 			ftrace_free_rec(p);
-			continue;
+			/* Game over */
+			break;
 		}
 
-		p->flags |= FTRACE_FL_CONVERTED;
 		ftrace_update_cnt++;
 
 		/*
@@ -1351,9 +1838,9 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 enum {
 	FTRACE_ITER_FILTER	= (1 << 0),
 	FTRACE_ITER_NOTRACE	= (1 << 1),
-	FTRACE_ITER_FAILURES	= (1 << 2),
-	FTRACE_ITER_PRINTALL	= (1 << 3),
-	FTRACE_ITER_HASH	= (1 << 4),
+	FTRACE_ITER_PRINTALL	= (1 << 2),
+	FTRACE_ITER_HASH	= (1 << 3),
+	FTRACE_ITER_ENABLED	= (1 << 4),
 };
 
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
@@ -1365,6 +1852,8 @@ struct ftrace_iterator {
 	struct dyn_ftrace		*func;
 	struct ftrace_func_probe	*probe;
 	struct trace_parser		parser;
+	struct ftrace_hash		*hash;
+	struct ftrace_ops		*ops;
 	int				hidx;
 	int				idx;
 	unsigned			flags;
@@ -1461,8 +1950,12 @@ static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
+	struct ftrace_ops *ops = &global_ops;
 	struct dyn_ftrace *rec = NULL;
 
+	if (unlikely(ftrace_disabled))
+		return NULL;
+
 	if (iter->flags & FTRACE_ITER_HASH)
 		return t_hash_next(m, pos);
 
@@ -1483,17 +1976,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		rec = &iter->pg->records[iter->idx++];
 		if ((rec->flags & FTRACE_FL_FREE) ||
 
-		    (!(iter->flags & FTRACE_ITER_FAILURES) &&
-		     (rec->flags & FTRACE_FL_FAILED)) ||
-
-		    ((iter->flags & FTRACE_ITER_FAILURES) &&
-		     !(rec->flags & FTRACE_FL_FAILED)) ||
-
 		    ((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(rec->flags & FTRACE_FL_FILTER)) ||
+		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
-		     !(rec->flags & FTRACE_FL_NOTRACE))) {
+		     !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) ||
+
+		    ((iter->flags & FTRACE_ITER_ENABLED) &&
+		     !(rec->flags & ~FTRACE_FL_MASK))) {
+
 			rec = NULL;
 			goto retry;
 		}
@@ -1517,10 +2008,15 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
+	struct ftrace_ops *ops = &global_ops;
 	void *p = NULL;
 	loff_t l;
 
 	mutex_lock(&ftrace_lock);
+
+	if (unlikely(ftrace_disabled))
+		return NULL;
+
 	/*
 	 * If an lseek was done, then reset and start from beginning.
 	 */
@@ -1532,7 +2028,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	 * off, we can short cut and just print out that all
 	 * functions are enabled.
 	 */
-	if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
+	if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
@@ -1590,7 +2086,11 @@ static int t_show(struct seq_file *m, void *v)
 	if (!rec)
 		return 0;
 
-	seq_printf(m, "%ps\n", (void *)rec->ip);
+	seq_printf(m, "%ps", (void *)rec->ip);
+	if (iter->flags & FTRACE_ITER_ENABLED)
+		seq_printf(m, " (%ld)",
+			   rec->flags & ~FTRACE_FL_MASK);
+	seq_printf(m, "\n");
 
 	return 0;
 }
@@ -1630,44 +2130,46 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 }
 
 static int
-ftrace_failures_open(struct inode *inode, struct file *file)
+ftrace_enabled_open(struct inode *inode, struct file *file)
 {
-	int ret;
-	struct seq_file *m;
 	struct ftrace_iterator *iter;
+	int ret;
+
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+
+	iter->pg = ftrace_pages_start;
+	iter->flags = FTRACE_ITER_ENABLED;
 
-	ret = ftrace_avail_open(inode, file);
+	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
-		m = file->private_data;
-		iter = m->private;
-		iter->flags = FTRACE_ITER_FAILURES;
+		struct seq_file *m = file->private_data;
+
+		m->private = iter;
+	} else {
+		kfree(iter);
 	}
 
 	return ret;
 }
 
-
-static void ftrace_filter_reset(int enable)
+static void ftrace_filter_reset(struct ftrace_hash *hash)
 {
-	struct ftrace_page *pg;
-	struct dyn_ftrace *rec;
-	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
-
 	mutex_lock(&ftrace_lock);
-	if (enable)
-		ftrace_filtered = 0;
-	do_for_each_ftrace_rec(pg, rec) {
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
-		rec->flags &= ~type;
-	} while_for_each_ftrace_rec();
+	ftrace_hash_clear(hash);
 	mutex_unlock(&ftrace_lock);
 }
 
 static int
-ftrace_regex_open(struct inode *inode, struct file *file, int enable)
+ftrace_regex_open(struct ftrace_ops *ops, int flag,
+		  struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
+	struct ftrace_hash *hash;
 	int ret = 0;
 
 	if (unlikely(ftrace_disabled))
@@ -1682,21 +2184,42 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 		return -ENOMEM;
 	}
 
+	if (flag & FTRACE_ITER_NOTRACE)
+		hash = ops->notrace_hash;
+	else
+		hash = ops->filter_hash;
+
+	iter->ops = ops;
+	iter->flags = flag;
+
+	if (file->f_mode & FMODE_WRITE) {
+		mutex_lock(&ftrace_lock);
+		iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash);
+		mutex_unlock(&ftrace_lock);
+
+		if (!iter->hash) {
+			trace_parser_put(&iter->parser);
+			kfree(iter);
+			return -ENOMEM;
+		}
+	}
+
 	mutex_lock(&ftrace_regex_lock);
+
 	if ((file->f_mode & FMODE_WRITE) &&
 	    (file->f_flags & O_TRUNC))
-		ftrace_filter_reset(enable);
+		ftrace_filter_reset(iter->hash);
 
 	if (file->f_mode & FMODE_READ) {
 		iter->pg = ftrace_pages_start;
-		iter->flags = enable ? FTRACE_ITER_FILTER :
-			FTRACE_ITER_NOTRACE;
 
 		ret = seq_open(file, &show_ftrace_seq_ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
 			m->private = iter;
 		} else {
+			/* Failed */
+			free_ftrace_hash(iter->hash);
 			trace_parser_put(&iter->parser);
 			kfree(iter);
 		}
@@ -1710,13 +2233,15 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(inode, file, 1);
+	return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
+				 inode, file);
 }
 
 static int
 ftrace_notrace_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(inode, file, 0);
+	return ftrace_regex_open(&global_ops, FTRACE_ITER_NOTRACE,
+				 inode, file);
 }
 
 static loff_t
@@ -1761,86 +2286,99 @@ static int ftrace_match(char *str, char *regex, int len, int type)
 }
 
 static int
-ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
+enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not)
+{
+	struct ftrace_func_entry *entry;
+	int ret = 0;
+
+	entry = ftrace_lookup_ip(hash, rec->ip);
+	if (not) {
+		/* Do nothing if it doesn't exist */
+		if (!entry)
+			return 0;
+
+		free_hash_entry(hash, entry);
+	} else {
+		/* Do nothing if it exists */
+		if (entry)
+			return 0;
+
+		ret = add_hash_entry(hash, rec->ip);
+	}
+	return ret;
+}
+
+static int
+ftrace_match_record(struct dyn_ftrace *rec, char *mod,
+		    char *regex, int len, int type)
 {
 	char str[KSYM_SYMBOL_LEN];
+	char *modname;
+
+	kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
+
+	if (mod) {
+		/* module lookup requires matching the module */
+		if (!modname || strcmp(modname, mod))
+			return 0;
+
+		/* blank search means to match all funcs in the mod */
+		if (!len)
+			return 1;
+	}
 
-	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
 	return ftrace_match(str, regex, len, type);
 }
 
-static int ftrace_match_records(char *buff, int len, int enable)
+static int
+match_records(struct ftrace_hash *hash, char *buff,
+	      int len, char *mod, int not)
 {
-	unsigned int search_len;
+	unsigned search_len = 0;
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
-	unsigned long flag;
-	char *search;
-	int type;
-	int not;
+	int type = MATCH_FULL;
+	char *search = buff;
 	int found = 0;
+	int ret;
 
-	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
-	type = filter_parse_regex(buff, len, &search, &not);
-
-	search_len = strlen(search);
+	if (len) {
+		type = filter_parse_regex(buff, len, &search, &not);
+		search_len = strlen(search);
+	}
 
 	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
 
-		if (ftrace_match_record(rec, search, search_len, type)) {
-			if (not)
-				rec->flags &= ~flag;
-			else
-				rec->flags |= flag;
+	do_for_each_ftrace_rec(pg, rec) {
+
+		if (ftrace_match_record(rec, mod, search, search_len, type)) {
+			ret = enter_record(hash, rec, not);
+			if (ret < 0) {
+				found = ret;
+				goto out_unlock;
+			}
 			found = 1;
 		}
-		/*
-		 * Only enable filtering if we have a function that
-		 * is filtered on.
-		 */
-		if (enable && (rec->flags & FTRACE_FL_FILTER))
-			ftrace_filtered = 1;
 	} while_for_each_ftrace_rec();
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 
 	return found;
 }
 
 static int
-ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
-			   char *regex, int len, int type)
+ftrace_match_records(struct ftrace_hash *hash, char *buff, int len)
 {
-	char str[KSYM_SYMBOL_LEN];
-	char *modname;
-
-	kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
-
-	if (!modname || strcmp(modname, mod))
-		return 0;
-
-	/* blank search means to match all funcs in the mod */
-	if (len)
-		return ftrace_match(str, regex, len, type);
-	else
-		return 1;
+	return match_records(hash, buff, len, NULL, 0);
 }
 
-static int ftrace_match_module_records(char *buff, char *mod, int enable)
+static int
+ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
 {
-	unsigned search_len = 0;
-	struct ftrace_page *pg;
-	struct dyn_ftrace *rec;
-	int type = MATCH_FULL;
-	char *search = buff;
-	unsigned long flag;
 	int not = 0;
-	int found = 0;
-
-	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
 
 	/* blank or '*' mean the same */
 	if (strcmp(buff, "*") == 0)
@@ -1852,32 +2390,7 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 		not = 1;
 	}
 
-	if (strlen(buff)) {
-		type = filter_parse_regex(buff, strlen(buff), &search, &not);
-		search_len = strlen(search);
-	}
-
-	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
-
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
-
-		if (ftrace_match_module_record(rec, mod,
-					       search, search_len, type)) {
-			if (not)
-				rec->flags &= ~flag;
-			else
-				rec->flags |= flag;
-			found = 1;
-		}
-		if (enable && (rec->flags & FTRACE_FL_FILTER))
-			ftrace_filtered = 1;
-
-	} while_for_each_ftrace_rec();
-	mutex_unlock(&ftrace_lock);
-
-	return found;
+	return match_records(hash, buff, strlen(buff), mod, not);
 }
 
 /*
@@ -1888,7 +2401,10 @@ static int ftrace_match_module_records(char *buff, char *mod, int enable)
 static int
 ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 {
+	struct ftrace_ops *ops = &global_ops;
+	struct ftrace_hash *hash;
 	char *mod;
+	int ret = -EINVAL;
 
 	/*
 	 * cmd == 'mod' because we only registered this func
@@ -1900,15 +2416,24 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
 
 	/* we must have a module name */
 	if (!param)
-		return -EINVAL;
+		return ret;
 
 	mod = strsep(&param, ":");
 	if (!strlen(mod))
-		return -EINVAL;
+		return ret;
 
-	if (ftrace_match_module_records(func, mod, enable))
-		return 0;
-	return -EINVAL;
+	if (enable)
+		hash = ops->filter_hash;
+	else
+		hash = ops->notrace_hash;
+
+	ret = ftrace_match_module_records(hash, func, mod);
+	if (!ret)
+		ret = -EINVAL;
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static struct ftrace_func_command ftrace_mod_cmd = {
@@ -1959,6 +2484,7 @@ static int ftrace_probe_registered;
 
 static void __enable_ftrace_function_probe(void)
 {
+	int ret;
 	int i;
 
 	if (ftrace_probe_registered)
@@ -1973,13 +2499,16 @@ static void __enable_ftrace_function_probe(void)
 	if (i == FTRACE_FUNC_HASHSIZE)
 		return;
 
-	__register_ftrace_function(&trace_probe_ops);
-	ftrace_startup(0);
+	ret = __register_ftrace_function(&trace_probe_ops);
+	if (!ret)
+		ftrace_startup(&trace_probe_ops, 0);
+
 	ftrace_probe_registered = 1;
 }
 
 static void __disable_ftrace_function_probe(void)
 {
+	int ret;
 	int i;
 
 	if (!ftrace_probe_registered)
@@ -1992,8 +2521,10 @@ static void __disable_ftrace_function_probe(void)
 	}
 
 	/* no more funcs left */
-	__unregister_ftrace_function(&trace_probe_ops);
-	ftrace_shutdown(0);
+	ret = __unregister_ftrace_function(&trace_probe_ops);
+	if (!ret)
+		ftrace_shutdown(&trace_probe_ops, 0);
+
 	ftrace_probe_registered = 0;
 }
 
@@ -2029,12 +2560,13 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 		return -EINVAL;
 
 	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FAILED)
-			continue;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
+	do_for_each_ftrace_rec(pg, rec) {
 
-		if (!ftrace_match_record(rec, search, len, type))
+		if (!ftrace_match_record(rec, NULL, search, len, type))
 			continue;
 
 		entry = kmalloc(sizeof(*entry), GFP_KERNEL);
@@ -2195,18 +2727,22 @@ int unregister_ftrace_command(struct ftrace_func_command *cmd)
 	return ret;
 }
 
-static int ftrace_process_regex(char *buff, int len, int enable)
+static int ftrace_process_regex(struct ftrace_hash *hash,
+				char *buff, int len, int enable)
 {
 	char *func, *command, *next = buff;
 	struct ftrace_func_command *p;
-	int ret = -EINVAL;
+	int ret;
 
 	func = strsep(&next, ":");
 
 	if (!next) {
-		if (ftrace_match_records(func, len, enable))
-			return 0;
-		return ret;
+		ret = ftrace_match_records(hash, func, len);
+		if (!ret)
+			ret = -EINVAL;
+		if (ret < 0)
+			return ret;
+		return 0;
 	}
 
 	/* command found */
@@ -2239,6 +2775,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 
 	mutex_lock(&ftrace_regex_lock);
 
+	ret = -ENODEV;
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
 	if (file->f_mode & FMODE_READ) {
 		struct seq_file *m = file->private_data;
 		iter = m->private;
@@ -2250,7 +2790,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
 
 	if (read >= 0 && trace_parser_loaded(parser) &&
 	    !trace_parser_cont(parser)) {
-		ret = ftrace_process_regex(parser->buffer,
+		ret = ftrace_process_regex(iter->hash, parser->buffer,
 					   parser->idx, enable);
 		trace_parser_clear(parser);
 		if (ret)
@@ -2278,22 +2818,49 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
 	return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
 }
 
-static void
-ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
+static int
+ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
+		 int reset, int enable)
 {
+	struct ftrace_hash **orig_hash;
+	struct ftrace_hash *hash;
+	int ret;
+
+	/* All global ops uses the global ops filters */
+	if (ops->flags & FTRACE_OPS_FL_GLOBAL)
+		ops = &global_ops;
+
 	if (unlikely(ftrace_disabled))
-		return;
+		return -ENODEV;
+
+	if (enable)
+		orig_hash = &ops->filter_hash;
+	else
+		orig_hash = &ops->notrace_hash;
+
+	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
+	if (!hash)
+		return -ENOMEM;
 
 	mutex_lock(&ftrace_regex_lock);
 	if (reset)
-		ftrace_filter_reset(enable);
+		ftrace_filter_reset(hash);
 	if (buf)
-		ftrace_match_records(buf, len, enable);
+		ftrace_match_records(hash, buf, len);
+
+	mutex_lock(&ftrace_lock);
+	ret = ftrace_hash_move(orig_hash, hash);
+	mutex_unlock(&ftrace_lock);
+
 	mutex_unlock(&ftrace_regex_lock);
+
+	free_ftrace_hash(hash);
+	return ret;
 }
 
 /**
  * ftrace_set_filter - set a function to filter on in ftrace
+ * @ops - the ops to set the filter with
  * @buf - the string that holds the function filter text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -2301,13 +2868,16 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
  * Filters denote which functions should be enabled when tracing is enabled.
  * If @buf is NULL and reset is set, all functions will be enabled for tracing.
  */
-void ftrace_set_filter(unsigned char *buf, int len, int reset)
+void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
+		       int len, int reset)
 {
-	ftrace_set_regex(buf, len, reset, 1);
+	ftrace_set_regex(ops, buf, len, reset, 1);
 }
+EXPORT_SYMBOL_GPL(ftrace_set_filter);
 
 /**
  * ftrace_set_notrace - set a function to not trace in ftrace
+ * @ops - the ops to set the notrace filter with
  * @buf - the string that holds the function notrace text.
  * @len - the length of the string.
  * @reset - non zero to reset all filters before applying this filter.
@@ -2316,10 +2886,44 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset)
  * is enabled. If @buf is NULL and reset is set, all functions will be enabled
  * for tracing.
  */
-void ftrace_set_notrace(unsigned char *buf, int len, int reset)
+void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
+			int len, int reset)
 {
-	ftrace_set_regex(buf, len, reset, 0);
+	ftrace_set_regex(ops, buf, len, reset, 0);
 }
+EXPORT_SYMBOL_GPL(ftrace_set_notrace);
+/**
+ * ftrace_set_filter - set a function to filter on in ftrace
+ * @ops - the ops to set the filter with
+ * @buf - the string that holds the function filter text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Filters denote which functions should be enabled when tracing is enabled.
+ * If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ */
+void ftrace_set_global_filter(unsigned char *buf, int len, int reset)
+{
+	ftrace_set_regex(&global_ops, buf, len, reset, 1);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
+
+/**
+ * ftrace_set_notrace - set a function to not trace in ftrace
+ * @ops - the ops to set the notrace filter with
+ * @buf - the string that holds the function notrace text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Notrace Filters denote which functions should not be enabled when tracing
+ * is enabled. If @buf is NULL and reset is set, all functions will be enabled
+ * for tracing.
+ */
+void ftrace_set_global_notrace(unsigned char *buf, int len, int reset)
+{
+	ftrace_set_regex(&global_ops, buf, len, reset, 0);
+}
+EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);
 
 /*
  * command line interface to allow users to set filters on boot up.
@@ -2370,22 +2974,23 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init set_ftrace_early_filter(char *buf, int enable)
+static void __init
+set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 {
 	char *func;
 
 	while (buf) {
 		func = strsep(&buf, ",");
-		ftrace_set_regex(func, strlen(func), 0, enable);
+		ftrace_set_regex(ops, func, strlen(func), 0, enable);
 	}
 }
 
 static void __init set_ftrace_early_filters(void)
 {
 	if (ftrace_filter_buf[0])
-		set_ftrace_early_filter(ftrace_filter_buf, 1);
+		set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
 	if (ftrace_notrace_buf[0])
-		set_ftrace_early_filter(ftrace_notrace_buf, 0);
+		set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_graph_buf[0])
 		set_ftrace_early_graph(ftrace_graph_buf);
@@ -2393,11 +2998,14 @@ static void __init set_ftrace_early_filters(void)
 }
 
 static int
-ftrace_regex_release(struct inode *inode, struct file *file, int enable)
+ftrace_regex_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct ftrace_iterator *iter;
+	struct ftrace_hash **orig_hash;
 	struct trace_parser *parser;
+	int filter_hash;
+	int ret;
 
 	mutex_lock(&ftrace_regex_lock);
 	if (file->f_mode & FMODE_READ) {
@@ -2410,33 +3018,41 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 	parser = &iter->parser;
 	if (trace_parser_loaded(parser)) {
 		parser->buffer[parser->idx] = 0;
-		ftrace_match_records(parser->buffer, parser->idx, enable);
+		ftrace_match_records(iter->hash, parser->buffer, parser->idx);
 	}
 
-	mutex_lock(&ftrace_lock);
-	if (ftrace_start_up && ftrace_enabled)
-		ftrace_run_update_code(FTRACE_ENABLE_CALLS);
-	mutex_unlock(&ftrace_lock);
-
 	trace_parser_put(parser);
+
+	if (file->f_mode & FMODE_WRITE) {
+		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
+
+		if (filter_hash)
+			orig_hash = &iter->ops->filter_hash;
+		else
+			orig_hash = &iter->ops->notrace_hash;
+
+		mutex_lock(&ftrace_lock);
+		/*
+		 * Remove the current set, update the hash and add
+		 * them back.
+		 */
+		ftrace_hash_rec_disable(iter->ops, filter_hash);
+		ret = ftrace_hash_move(orig_hash, iter->hash);
+		if (!ret) {
+			ftrace_hash_rec_enable(iter->ops, filter_hash);
+			if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
+			    && ftrace_enabled)
+				ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+		}
+		mutex_unlock(&ftrace_lock);
+	}
+	free_ftrace_hash(iter->hash);
 	kfree(iter);
 
 	mutex_unlock(&ftrace_regex_lock);
 	return 0;
 }
 
-static int
-ftrace_filter_release(struct inode *inode, struct file *file)
-{
-	return ftrace_regex_release(inode, file, 1);
-}
-
-static int
-ftrace_notrace_release(struct inode *inode, struct file *file)
-{
-	return ftrace_regex_release(inode, file, 0);
-}
-
 static const struct file_operations ftrace_avail_fops = {
 	.open = ftrace_avail_open,
 	.read = seq_read,
@@ -2444,8 +3060,8 @@ static const struct file_operations ftrace_avail_fops = {
 	.release = seq_release_private,
 };
 
-static const struct file_operations ftrace_failures_fops = {
-	.open = ftrace_failures_open,
+static const struct file_operations ftrace_enabled_fops = {
+	.open = ftrace_enabled_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
 	.release = seq_release_private,
@@ -2456,7 +3072,7 @@ static const struct file_operations ftrace_filter_fops = {
 	.read = seq_read,
 	.write = ftrace_filter_write,
 	.llseek = ftrace_regex_lseek,
-	.release = ftrace_filter_release,
+	.release = ftrace_regex_release,
 };
 
 static const struct file_operations ftrace_notrace_fops = {
@@ -2464,7 +3080,7 @@ static const struct file_operations ftrace_notrace_fops = {
 	.read = seq_read,
 	.write = ftrace_notrace_write,
 	.llseek = ftrace_regex_lseek,
-	.release = ftrace_notrace_release,
+	.release = ftrace_regex_release,
 };
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -2573,9 +3189,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 	bool exists;
 	int i;
 
-	if (ftrace_disabled)
-		return -ENODEV;
-
 	/* decode regex */
 	type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
 	if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
@@ -2584,12 +3197,18 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 	search_len = strlen(search);
 
 	mutex_lock(&ftrace_lock);
+
+	if (unlikely(ftrace_disabled)) {
+		mutex_unlock(&ftrace_lock);
+		return -ENODEV;
+	}
+
 	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
+		if (rec->flags & FTRACE_FL_FREE)
 			continue;
 
-		if (ftrace_match_record(rec, search, search_len, type)) {
+		if (ftrace_match_record(rec, NULL, search, search_len, type)) {
 			/* if it is in the array */
 			exists = false;
 			for (i = 0; i < *idx; i++) {
@@ -2679,8 +3298,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 	trace_create_file("available_filter_functions", 0444,
 			d_tracer, NULL, &ftrace_avail_fops);
 
-	trace_create_file("failures", 0444,
-			d_tracer, NULL, &ftrace_failures_fops);
+	trace_create_file("enabled_functions", 0444,
+			d_tracer, NULL, &ftrace_enabled_fops);
 
 	trace_create_file("set_ftrace_filter", 0644, d_tracer,
 			NULL, &ftrace_filter_fops);
@@ -2703,7 +3322,6 @@ static int ftrace_process_locs(struct module *mod,
 {
 	unsigned long *p;
 	unsigned long addr;
-	unsigned long flags;
 
 	mutex_lock(&ftrace_lock);
 	p = start;
@@ -2720,10 +3338,7 @@ static int ftrace_process_locs(struct module *mod,
 		ftrace_record_ip(addr);
 	}
 
-	/* disable interrupts to prevent kstop machine */
-	local_irq_save(flags);
 	ftrace_update_code(mod);
-	local_irq_restore(flags);
 	mutex_unlock(&ftrace_lock);
 
 	return 0;
@@ -2735,10 +3350,11 @@ void ftrace_release_mod(struct module *mod)
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
 
+	mutex_lock(&ftrace_lock);
+
 	if (ftrace_disabled)
-		return;
+		goto out_unlock;
 
-	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		if (within_module_core(rec->ip, mod)) {
 			/*
@@ -2749,6 +3365,7 @@ void ftrace_release_mod(struct module *mod)
 			ftrace_free_rec(rec);
 		}
 	} while_for_each_ftrace_rec();
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 }
 
@@ -2835,6 +3452,10 @@ void __init ftrace_init(void)
 
 #else
 
+static struct ftrace_ops global_ops = {
+	.func			= ftrace_stub,
+};
+
 static int __init ftrace_nodyn_init(void)
 {
 	ftrace_enabled = 1;
@@ -2845,12 +3466,38 @@ device_initcall(ftrace_nodyn_init);
 static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
 /* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(command)	do { } while (0)
-# define ftrace_shutdown(command)	do { } while (0)
+# define ftrace_startup(ops, command)	do { } while (0)
+# define ftrace_shutdown(ops, command)	do { } while (0)
 # define ftrace_startup_sysctl()	do { } while (0)
 # define ftrace_shutdown_sysctl()	do { } while (0)
+
+static inline int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+{
+	return 1;
+}
+
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+static void
+ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
+{
+	struct ftrace_ops *op;
+
+	/*
+	 * Some of the ops may be dynamically allocated,
+	 * they must be freed after a synchronize_sched().
+	 */
+	preempt_disable_notrace();
+	op = rcu_dereference_raw(ftrace_ops_list);
+	while (op != &ftrace_list_end) {
+		if (ftrace_ops_test(op, ip))
+			op->func(ip, parent_ip);
+		op = rcu_dereference_raw(op->next);
+	};
+	preempt_enable_notrace();
+}
+
 static void clear_ftrace_swapper(void)
 {
 	struct task_struct *p;
@@ -3143,19 +3790,23 @@ void ftrace_kill(void)
  */
 int register_ftrace_function(struct ftrace_ops *ops)
 {
-	int ret;
-
-	if (unlikely(ftrace_disabled))
-		return -1;
+	int ret = -1;
 
 	mutex_lock(&ftrace_lock);
 
+	if (unlikely(ftrace_disabled))
+		goto out_unlock;
+
 	ret = __register_ftrace_function(ops);
-	ftrace_startup(0);
+	if (!ret)
+		ftrace_startup(ops, 0);
 
+
+ out_unlock:
 	mutex_unlock(&ftrace_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(register_ftrace_function);
 
 /**
  * unregister_ftrace_function - unregister a function for profiling.
@@ -3169,25 +3820,27 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 	mutex_lock(&ftrace_lock);
 	ret = __unregister_ftrace_function(ops);
-	ftrace_shutdown(0);
+	if (!ret)
+		ftrace_shutdown(ops, 0);
 	mutex_unlock(&ftrace_lock);
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(unregister_ftrace_function);
 
 int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos)
 {
-	int ret;
-
-	if (unlikely(ftrace_disabled))
-		return -ENODEV;
+	int ret = -ENODEV;
 
 	mutex_lock(&ftrace_lock);
 
-	ret  = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (unlikely(ftrace_disabled))
+		goto out;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
 		goto out;
@@ -3199,11 +3852,11 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 		ftrace_startup_sysctl();
 
 		/* we are starting ftrace again */
-		if (ftrace_list != &ftrace_list_end) {
-			if (ftrace_list->next == &ftrace_list_end)
-				ftrace_trace_function = ftrace_list->func;
+		if (ftrace_ops_list != &ftrace_list_end) {
+			if (ftrace_ops_list->next == &ftrace_list_end)
+				ftrace_trace_function = ftrace_ops_list->func;
 			else
-				ftrace_trace_function = ftrace_list_func;
+				ftrace_trace_function = ftrace_ops_list_func;
 		}
 
 	} else {
@@ -3392,7 +4045,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 	ftrace_graph_return = retfunc;
 	ftrace_graph_entry = entryfunc;
 
-	ftrace_startup(FTRACE_START_FUNC_RET);
+	ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
 
 out:
 	mutex_unlock(&ftrace_lock);
@@ -3409,7 +4062,7 @@ void unregister_ftrace_graph(void)
 	ftrace_graph_active--;
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
-	ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+	ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
 	unregister_pm_notifier(&ftrace_suspend_notifier);
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d38c16a06a6f..ee9c921d7f21 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1110,6 +1110,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->padding			= 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -2013,9 +2014,10 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
 	enum print_line_t ret;
 
-	if (iter->lost_events)
-		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
-				 iter->cpu, iter->lost_events);
+	if (iter->lost_events &&
+	    !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
+				 iter->cpu, iter->lost_events))
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	if (iter->trace && iter->trace->print_line) {
 		ret = iter->trace->print_line(iter);
@@ -3229,6 +3231,14 @@ waitagain:
 
 		if (iter->seq.len >= cnt)
 			break;
+
+		/*
+		 * Setting the full flag means we reached the trace_seq buffer
+		 * size and we should leave by partial output condition above.
+		 * One of the trace_seq_* functions is not used properly.
+		 */
+		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
+			  iter->ent->type);
 	}
 	trace_access_unlock(iter->cpu_file);
 	trace_event_read_unlock();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5e9dfc6286dd..6b69c4bd306f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -419,6 +419,8 @@ extern void trace_find_cmdline(int pid, char comm[]);
 extern unsigned long ftrace_update_tot_cnt;
 #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
 extern int DYN_FTRACE_TEST_NAME(void);
+#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
+extern int DYN_FTRACE_TEST_NAME2(void);
 #endif
 
 extern int ring_buffer_expanded;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e88f74fe1d4c..2fe110341359 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,6 +116,7 @@ static int trace_define_common_fields(void)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
+	__common_field(int, padding);
 
 	return ret;
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 16aee4d44e8f..8d0e1cc4e974 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -149,11 +149,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = function_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 static struct ftrace_ops trace_stack_ops __read_mostly =
 {
 	.func = function_stack_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 /* Our two options */
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a4969b47afc1..c77424be284d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -153,6 +153,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = irqsoff_tracer_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 35d55a386145..f925c45f0afa 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -53,7 +53,6 @@ const char *reserved_field_names[] = {
 	"common_preempt_count",
 	"common_pid",
 	"common_tgid",
-	"common_lock_depth",
 	FIELD_STRING_IP,
 	FIELD_STRING_RETIP,
 	FIELD_STRING_FUNC,
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 456be9063c2d..cf535ccedc86 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -830,6 +830,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
 enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
 				  struct trace_event *event)
 {
+	if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type))
+		return TRACE_TYPE_PARTIAL_LINE;
+
 	return TRACE_TYPE_HANDLED;
 }
 
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 2547d8813cf0..dff763b7baf1 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -32,7 +32,7 @@ static DEFINE_MUTEX(btrace_mutex);
 
 struct trace_bprintk_fmt {
 	struct list_head list;
-	char fmt[0];
+	const char *fmt;
 };
 
 static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
@@ -49,6 +49,7 @@ static
 void hold_module_trace_bprintk_format(const char **start, const char **end)
 {
 	const char **iter;
+	char *fmt;
 
 	mutex_lock(&btrace_mutex);
 	for (iter = start; iter < end; iter++) {
@@ -58,14 +59,18 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
 			continue;
 		}
 
-		tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
-				+ strlen(*iter) + 1, GFP_KERNEL);
-		if (tb_fmt) {
+		tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL);
+		if (tb_fmt)
+			fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
+		if (tb_fmt && fmt) {
 			list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
-			strcpy(tb_fmt->fmt, *iter);
+			strcpy(fmt, *iter);
+			tb_fmt->fmt = fmt;
 			*iter = tb_fmt->fmt;
-		} else
+		} else {
+			kfree(tb_fmt);
 			*iter = NULL;
+		}
 	}
 	mutex_unlock(&btrace_mutex);
 }
@@ -84,6 +89,76 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
 	return 0;
 }
 
+/*
+ * The debugfs/tracing/printk_formats file maps the addresses with
+ * the ASCII formats that are used in the bprintk events in the
+ * buffer. For userspace tools to be able to decode the events from
+ * the buffer, they need to be able to map the address with the format.
+ *
+ * The addresses of the bprintk formats are in their own section
+ * __trace_printk_fmt. But for modules we copy them into a link list.
+ * The code to print the formats and their addresses passes around the
+ * address of the fmt string. If the fmt address passed into the seq
+ * functions is within the kernel core __trace_printk_fmt section, then
+ * it simply uses the next pointer in the list.
+ *
+ * When the fmt pointer is outside the kernel core __trace_printk_fmt
+ * section, then we need to read the link list pointers. The trick is
+ * we pass the address of the string to the seq function just like
+ * we do for the kernel core formats. To get back the structure that
+ * holds the format, we simply use containerof() and then go to the
+ * next format in the list.
+ */
+static const char **
+find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
+{
+	struct trace_bprintk_fmt *mod_fmt;
+
+	if (list_empty(&trace_bprintk_fmt_list))
+		return NULL;
+
+	/*
+	 * v will point to the address of the fmt record from t_next
+	 * v will be NULL from t_start.
+	 * If this is the first pointer or called from start
+	 * then we need to walk the list.
+	 */
+	if (!v || start_index == *pos) {
+		struct trace_bprintk_fmt *p;
+
+		/* search the module list */
+		list_for_each_entry(p, &trace_bprintk_fmt_list, list) {
+			if (start_index == *pos)
+				return &p->fmt;
+			start_index++;
+		}
+		/* pos > index */
+		return NULL;
+	}
+
+	/*
+	 * v points to the address of the fmt field in the mod list
+	 * structure that holds the module print format.
+	 */
+	mod_fmt = container_of(v, typeof(*mod_fmt), fmt);
+	if (mod_fmt->list.next == &trace_bprintk_fmt_list)
+		return NULL;
+
+	mod_fmt = container_of(mod_fmt->list.next, typeof(*mod_fmt), list);
+
+	return &mod_fmt->fmt;
+}
+
+static void format_mod_start(void)
+{
+	mutex_lock(&btrace_mutex);
+}
+
+static void format_mod_stop(void)
+{
+	mutex_unlock(&btrace_mutex);
+}
+
 #else /* !CONFIG_MODULES */
 __init static int
 module_trace_bprintk_format_notify(struct notifier_block *self,
@@ -91,6 +166,13 @@ module_trace_bprintk_format_notify(struct notifier_block *self,
 {
 	return 0;
 }
+static inline const char **
+find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
+{
+	return NULL;
+}
+static inline void format_mod_start(void) { }
+static inline void format_mod_stop(void) { }
 #endif /* CONFIG_MODULES */
 
 
@@ -153,20 +235,33 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
 }
 EXPORT_SYMBOL_GPL(__ftrace_vprintk);
 
+static const char **find_next(void *v, loff_t *pos)
+{
+	const char **fmt = v;
+	int start_index;
+
+	if (!fmt)
+		fmt = __start___trace_bprintk_fmt + *pos;
+
+	start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
+
+	if (*pos < start_index)
+		return fmt;
+
+	return find_next_mod_format(start_index, v, fmt, pos);
+}
+
 static void *
 t_start(struct seq_file *m, loff_t *pos)
 {
-	const char **fmt = __start___trace_bprintk_fmt + *pos;
-
-	if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
-		return NULL;
-	return fmt;
+	format_mod_start();
+	return find_next(NULL, pos);
 }
 
 static void *t_next(struct seq_file *m, void * v, loff_t *pos)
 {
 	(*pos)++;
-	return t_start(m, pos);
+	return find_next(v, pos);
 }
 
 static int t_show(struct seq_file *m, void *v)
@@ -205,6 +300,7 @@ static int t_show(struct seq_file *m, void *v)
 
 static void t_stop(struct seq_file *m, void *p)
 {
+	format_mod_stop();
 }
 
 static const struct seq_operations show_format_seq_ops = {
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 7319559ed59f..f029dd4fd2ca 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -129,6 +129,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = wakeup_tracer_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 659732eba07c..288541f977fb 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -101,6 +101,206 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
+static int trace_selftest_test_probe1_cnt;
+static void trace_selftest_test_probe1_func(unsigned long ip,
+					    unsigned long pip)
+{
+	trace_selftest_test_probe1_cnt++;
+}
+
+static int trace_selftest_test_probe2_cnt;
+static void trace_selftest_test_probe2_func(unsigned long ip,
+					    unsigned long pip)
+{
+	trace_selftest_test_probe2_cnt++;
+}
+
+static int trace_selftest_test_probe3_cnt;
+static void trace_selftest_test_probe3_func(unsigned long ip,
+					    unsigned long pip)
+{
+	trace_selftest_test_probe3_cnt++;
+}
+
+static int trace_selftest_test_global_cnt;
+static void trace_selftest_test_global_func(unsigned long ip,
+					    unsigned long pip)
+{
+	trace_selftest_test_global_cnt++;
+}
+
+static int trace_selftest_test_dyn_cnt;
+static void trace_selftest_test_dyn_func(unsigned long ip,
+					 unsigned long pip)
+{
+	trace_selftest_test_dyn_cnt++;
+}
+
+static struct ftrace_ops test_probe1 = {
+	.func			= trace_selftest_test_probe1_func,
+};
+
+static struct ftrace_ops test_probe2 = {
+	.func			= trace_selftest_test_probe2_func,
+};
+
+static struct ftrace_ops test_probe3 = {
+	.func			= trace_selftest_test_probe3_func,
+};
+
+static struct ftrace_ops test_global = {
+	.func			= trace_selftest_test_global_func,
+	.flags			= FTRACE_OPS_FL_GLOBAL,
+};
+
+static void print_counts(void)
+{
+	printk("(%d %d %d %d %d) ",
+	       trace_selftest_test_probe1_cnt,
+	       trace_selftest_test_probe2_cnt,
+	       trace_selftest_test_probe3_cnt,
+	       trace_selftest_test_global_cnt,
+	       trace_selftest_test_dyn_cnt);
+}
+
+static void reset_counts(void)
+{
+	trace_selftest_test_probe1_cnt = 0;
+	trace_selftest_test_probe2_cnt = 0;
+	trace_selftest_test_probe3_cnt = 0;
+	trace_selftest_test_global_cnt = 0;
+	trace_selftest_test_dyn_cnt = 0;
+}
+
+static int trace_selftest_ops(int cnt)
+{
+	int save_ftrace_enabled = ftrace_enabled;
+	struct ftrace_ops *dyn_ops;
+	char *func1_name;
+	char *func2_name;
+	int len1;
+	int len2;
+	int ret = -1;
+
+	printk(KERN_CONT "PASSED\n");
+	pr_info("Testing dynamic ftrace ops #%d: ", cnt);
+
+	ftrace_enabled = 1;
+	reset_counts();
+
+	/* Handle PPC64 '.' name */
+	func1_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
+	func2_name = "*" __stringify(DYN_FTRACE_TEST_NAME2);
+	len1 = strlen(func1_name);
+	len2 = strlen(func2_name);
+
+	/*
+	 * Probe 1 will trace function 1.
+	 * Probe 2 will trace function 2.
+	 * Probe 3 will trace functions 1 and 2.
+	 */
+	ftrace_set_filter(&test_probe1, func1_name, len1, 1);
+	ftrace_set_filter(&test_probe2, func2_name, len2, 1);
+	ftrace_set_filter(&test_probe3, func1_name, len1, 1);
+	ftrace_set_filter(&test_probe3, func2_name, len2, 0);
+
+	register_ftrace_function(&test_probe1);
+	register_ftrace_function(&test_probe2);
+	register_ftrace_function(&test_probe3);
+	register_ftrace_function(&test_global);
+
+	DYN_FTRACE_TEST_NAME();
+
+	print_counts();
+
+	if (trace_selftest_test_probe1_cnt != 1)
+		goto out;
+	if (trace_selftest_test_probe2_cnt != 0)
+		goto out;
+	if (trace_selftest_test_probe3_cnt != 1)
+		goto out;
+	if (trace_selftest_test_global_cnt == 0)
+		goto out;
+
+	DYN_FTRACE_TEST_NAME2();
+
+	print_counts();
+
+	if (trace_selftest_test_probe1_cnt != 1)
+		goto out;
+	if (trace_selftest_test_probe2_cnt != 1)
+		goto out;
+	if (trace_selftest_test_probe3_cnt != 2)
+		goto out;
+
+	/* Add a dynamic probe */
+	dyn_ops = kzalloc(sizeof(*dyn_ops), GFP_KERNEL);
+	if (!dyn_ops) {
+		printk("MEMORY ERROR ");
+		goto out;
+	}
+
+	dyn_ops->func = trace_selftest_test_dyn_func;
+
+	register_ftrace_function(dyn_ops);
+
+	trace_selftest_test_global_cnt = 0;
+
+	DYN_FTRACE_TEST_NAME();
+
+	print_counts();
+
+	if (trace_selftest_test_probe1_cnt != 2)
+		goto out_free;
+	if (trace_selftest_test_probe2_cnt != 1)
+		goto out_free;
+	if (trace_selftest_test_probe3_cnt != 3)
+		goto out_free;
+	if (trace_selftest_test_global_cnt == 0)
+		goto out;
+	if (trace_selftest_test_dyn_cnt == 0)
+		goto out_free;
+
+	DYN_FTRACE_TEST_NAME2();
+
+	print_counts();
+
+	if (trace_selftest_test_probe1_cnt != 2)
+		goto out_free;
+	if (trace_selftest_test_probe2_cnt != 2)
+		goto out_free;
+	if (trace_selftest_test_probe3_cnt != 4)
+		goto out_free;
+
+	ret = 0;
+ out_free:
+	unregister_ftrace_function(dyn_ops);
+	kfree(dyn_ops);
+
+ out:
+	/* Purposely unregister in the same order */
+	unregister_ftrace_function(&test_probe1);
+	unregister_ftrace_function(&test_probe2);
+	unregister_ftrace_function(&test_probe3);
+	unregister_ftrace_function(&test_global);
+
+	/* Make sure everything is off */
+	reset_counts();
+	DYN_FTRACE_TEST_NAME();
+	DYN_FTRACE_TEST_NAME();
+
+	if (trace_selftest_test_probe1_cnt ||
+	    trace_selftest_test_probe2_cnt ||
+	    trace_selftest_test_probe3_cnt ||
+	    trace_selftest_test_global_cnt ||
+	    trace_selftest_test_dyn_cnt)
+		ret = -1;
+
+	ftrace_enabled = save_ftrace_enabled;
+
+	return ret;
+}
+
 /* Test dynamic code modification and ftrace filters */
 int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 					   struct trace_array *tr,
@@ -131,7 +331,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
 
 	/* filter only on our function */
-	ftrace_set_filter(func_name, strlen(func_name), 1);
+	ftrace_set_global_filter(func_name, strlen(func_name), 1);
 
 	/* enable tracing */
 	ret = tracer_init(trace, tr);
@@ -166,22 +366,30 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 
 	/* check the trace buffer */
 	ret = trace_test_buffer(tr, &count);
-	trace->reset(tr);
 	tracing_start();
 
 	/* we should only have one item */
 	if (!ret && count != 1) {
+		trace->reset(tr);
 		printk(KERN_CONT ".. filter failed count=%ld ..", count);
 		ret = -1;
 		goto out;
 	}
 
+	/* Test the ops with global tracing running */
+	ret = trace_selftest_ops(1);
+	trace->reset(tr);
+
  out:
 	ftrace_enabled = save_ftrace_enabled;
 	tracer_enabled = save_tracer_enabled;
 
 	/* Enable tracing on all functions again */
-	ftrace_set_filter(NULL, 0, 1);
+	ftrace_set_global_filter(NULL, 0, 1);
+
+	/* Test the ops with global tracing off */
+	if (!ret)
+		ret = trace_selftest_ops(2);
 
 	return ret;
 }
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
index 54dd77cce5bf..b4c475a0a48b 100644
--- a/kernel/trace/trace_selftest_dynamic.c
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -5,3 +5,9 @@ int DYN_FTRACE_TEST_NAME(void)
 	/* used to call mcount */
 	return 0;
 }
+
+int DYN_FTRACE_TEST_NAME2(void)
+{
+	/* used to call mcount */
+	return 0;
+}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4c5dead0c239..b0b53b8e4c25 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -133,6 +133,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
 static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = stack_trace_call,
+	.flags = FTRACE_OPS_FL_GLOBAL,
 };
 
 static ssize_t
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 68187af4889e..b219f1449c54 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -251,9 +251,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 {
 	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 
-	if (elem->regfunc && !elem->state && active)
+	if (elem->regfunc && !jump_label_enabled(&elem->key) && active)
 		elem->regfunc();
-	else if (elem->unregfunc && elem->state && !active)
+	else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active)
 		elem->unregfunc();
 
 	/*
@@ -264,13 +264,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 	 * is used.
 	 */
 	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
-	if (!elem->state && active) {
-		jump_label_enable(&elem->state);
-		elem->state = active;
-	} else if (elem->state && !active) {
-		jump_label_disable(&elem->state);
-		elem->state = active;
-	}
+	if (active && !jump_label_enabled(&elem->key))
+		jump_label_inc(&elem->key);
+	else if (!active && jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 }
 
 /*
@@ -281,13 +278,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
  */
 static void disable_tracepoint(struct tracepoint *elem)
 {
-	if (elem->unregfunc && elem->state)
+	if (elem->unregfunc && jump_label_enabled(&elem->key))
 		elem->unregfunc();
 
-	if (elem->state) {
-		jump_label_disable(&elem->state);
-		elem->state = 0;
-	}
+	if (jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 	rcu_assign_pointer(elem->funcs, NULL);
 }
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 140dce750450..14733d4d156b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -430,9 +430,12 @@ static int watchdog_enable(int cpu)
 		p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
 		if (IS_ERR(p)) {
 			printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
-			if (!err)
+			if (!err) {
 				/* if hardlockup hasn't already set this */
 				err = PTR_ERR(p);
+				/* and disable the perf event */
+				watchdog_nmi_disable(cpu);
+			}
 			goto out;
 		}
 		kthread_bind(p, cpu);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8859a41806dd..e3378e8d3a5c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1291,8 +1291,14 @@ __acquires(&gcwq->lock)
 			return true;
 		spin_unlock_irq(&gcwq->lock);
 
-		/* CPU has come up in between, retry migration */
+		/*
+		 * We've raced with CPU hot[un]plug.  Give it a breather
+		 * and retry migration.  cond_resched() is required here;
+		 * otherwise, we might deadlock against cpu_stop trying to
+		 * bring down the CPU on non-preemptive kernel.
+		 */
 		cpu_relax();
+		cond_resched();
 	}
 }
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c768bcdda1b7..10ef61981149 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -238,6 +238,21 @@ config DETECT_HUNG_TASK
 	  enabled then all held locks will also be reported. This
 	  feature has negligible overhead.
 
+config DEFAULT_HUNG_TASK_TIMEOUT
+	int "Default timeout for hung task detection (in seconds)"
+	depends on DETECT_HUNG_TASK
+	default 120
+	help
+	  This option controls the default timeout (in seconds) used
+	  to determine when a task has become non-responsive and should
+	  be considered hung.
+
+	  It can be adjusted at runtime via the kernel.hung_task_timeout
+	  sysctl or by writing a value to /proc/sys/kernel/hung_task_timeout.
+
+	  A timeout of 0 disables the check.  The default is two minutes.
+	  Keeping the default should be fine in most cases.
+
 config BOOTPARAM_HUNG_TASK_PANIC
 	bool "Panic (Reboot) On Hung Tasks"
 	depends on DETECT_HUNG_TASK
@@ -337,7 +352,7 @@ config DEBUG_OBJECTS_WORK
 
 config DEBUG_OBJECTS_RCU_HEAD
 	bool "Debug RCU callbacks objects"
-	depends on DEBUG_OBJECTS && PREEMPT
+	depends on DEBUG_OBJECTS
 	help
 	  Enable this to turn on debugging of RCU list heads (call_rcu() usage).
 
@@ -398,9 +413,9 @@ config SLUB_STATS
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
 	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
-		(X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
+		(X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
 
-	select DEBUG_FS if SYSFS
+	select DEBUG_FS
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
 	select CRC32
@@ -875,22 +890,9 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
-config RCU_CPU_STALL_DETECTOR
-	bool "Check for stalled CPUs delaying RCU grace periods"
-	depends on TREE_RCU || TREE_PREEMPT_RCU
-	default y
-	help
-	  This option causes RCU to printk information on which
-	  CPUs are delaying the current grace period, but only when
-	  the grace period extends for excessive time periods.
-
-	  Say N if you want to disable such checks.
-
-	  Say Y if you are unsure.
-
 config RCU_CPU_STALL_TIMEOUT
 	int "RCU CPU stall timeout in seconds"
-	depends on RCU_CPU_STALL_DETECTOR
+	depends on TREE_RCU || TREE_PREEMPT_RCU
 	range 3 300
 	default 60
 	help
@@ -899,22 +901,9 @@ config RCU_CPU_STALL_TIMEOUT
 	  RCU grace period persists, additional CPU stall warnings are
 	  printed at more widely spaced intervals.
 
-config RCU_CPU_STALL_DETECTOR_RUNNABLE
-	bool "RCU CPU stall checking starts automatically at boot"
-	depends on RCU_CPU_STALL_DETECTOR
-	default y
-	help
-	  If set, start checking for RCU CPU stalls immediately on
-	  boot.  Otherwise, RCU CPU stall checking must be manually
-	  enabled.
-
-	  Say Y if you are unsure.
-
-	  Say N if you wish to suppress RCU CPU stall checking during boot.
-
 config RCU_CPU_STALL_VERBOSE
 	bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
-	depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
+	depends on TREE_PREEMPT_RCU
 	default y
 	help
 	  This option causes RCU to printk detailed per-task information
diff --git a/lib/Makefile b/lib/Makefile
index ef0f28571156..4b49a249064b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,8 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o
+	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
+	 bsearch.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
diff --git a/lib/bsearch.c b/lib/bsearch.c
new file mode 100644
index 000000000000..5b54758e2afb
--- /dev/null
+++ b/lib/bsearch.c
@@ -0,0 +1,53 @@
+/*
+ * A generic implementation of binary search for the Linux kernel
+ *
+ * Copyright (C) 2008-2009 Ksplice, Inc.
+ * Author: Tim Abbott <tabbott@ksplice.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2.
+ */
+
+#include <linux/module.h>
+#include <linux/bsearch.h>
+
+/*
+ * bsearch - binary search an array of elements
+ * @key: pointer to item being searched for
+ * @base: pointer to first element to search
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp: pointer to comparison function
+ *
+ * This function does a binary search on the given array.  The
+ * contents of the array should already be in ascending sorted order
+ * under the provided comparison function.
+ *
+ * Note that the key need not have the same type as the elements in
+ * the array, e.g. key could be a string and the comparison function
+ * could compare the string with the struct's name field.  However, if
+ * the key and elements in the array are of the same type, you can use
+ * the same comparison function for both sort() and bsearch().
+ */
+void *bsearch(const void *key, const void *base, size_t num, size_t size,
+	      int (*cmp)(const void *key, const void *elt))
+{
+	size_t start = 0, end = num;
+	int result;
+
+	while (start < end) {
+		size_t mid = start + (end - start) / 2;
+
+		result = cmp(key, base + mid * size);
+		if (result < 0)
+			end = mid;
+		else if (result > 0)
+			start = mid + 1;
+		else
+			return (void *)base + mid * size;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(bsearch);
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 4bfb0471f106..db07bfd9298e 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -649,7 +649,7 @@ out_err:
 	return -ENOMEM;
 }
 
-static int device_dma_allocations(struct device *dev)
+static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
 {
 	struct dma_debug_entry *entry;
 	unsigned long flags;
@@ -660,8 +660,10 @@ static int device_dma_allocations(struct device *dev)
 	for (i = 0; i < HASH_SIZE; ++i) {
 		spin_lock(&dma_entry_hash[i].lock);
 		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
-			if (entry->dev == dev)
+			if (entry->dev == dev) {
 				count += 1;
+				*out_entry = entry;
+			}
 		}
 		spin_unlock(&dma_entry_hash[i].lock);
 	}
@@ -674,6 +676,7 @@ static int device_dma_allocations(struct device *dev)
 static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct device *dev = data;
+	struct dma_debug_entry *uninitialized_var(entry);
 	int count;
 
 	if (global_disable)
@@ -681,12 +684,17 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti
 
 	switch (action) {
 	case BUS_NOTIFY_UNBOUND_DRIVER:
-		count = device_dma_allocations(dev);
+		count = device_dma_allocations(dev, &entry);
 		if (count == 0)
 			break;
-		err_printk(dev, NULL, "DMA-API: device driver has pending "
+		err_printk(dev, entry, "DMA-API: device driver has pending "
 				"DMA allocations while released from device "
-				"[count=%d]\n", count);
+				"[count=%d]\n"
+				"One of leaked entries details: "
+				"[device address=0x%016llx] [size=%llu bytes] "
+				"[mapped with %s] [mapped as %s]\n",
+			count, entry->dev_addr, entry->size,
+			dir2name[entry->direction], type2name[entry->type]);
 		break;
 	default:
 		break;
diff --git a/lib/flex_array.c b/lib/flex_array.c
index c0ea40ba2082..854b57bd7d9d 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(flex_array_clear);
 
 /**
  * flex_array_prealloc - guarantee that array space exists
- * @fa:		the flex array for which to preallocate parts
- * @start:	index of first array element for which space is allocated
- * @end:	index of last (inclusive) element for which space is allocated
- * @flags:	page allocation flags
+ * @fa:			the flex array for which to preallocate parts
+ * @start:		index of first array element for which space is allocated
+ * @nr_elements:	number of elements for which space is allocated
+ * @flags:		page allocation flags
  *
  * This will guarantee that no future calls to flex_array_put()
  * will allocate memory.  It can be used if you are expecting to
@@ -245,14 +245,24 @@ EXPORT_SYMBOL(flex_array_clear);
  * Locking must be provided by the caller.
  */
 int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-			unsigned int end, gfp_t flags)
+			unsigned int nr_elements, gfp_t flags)
 {
 	int start_part;
 	int end_part;
 	int part_nr;
+	unsigned int end;
 	struct flex_array_part *part;
 
-	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+	if (!start && !nr_elements)
+		return 0;
+	if (start >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (!nr_elements)
+		return 0;
+
+	end = start + nr_elements - 1;
+
+	if (end >= fa->total_nr_elements)
 		return -ENOSPC;
 	if (elements_fit_in_base(fa))
 		return 0;
@@ -343,6 +353,8 @@ int flex_array_shrink(struct flex_array *fa)
 	int part_nr;
 	int ret = 0;
 
+	if (!fa->total_nr_elements)
+		return 0;
 	if (elements_fit_in_base(fa))
 		return ret;
 	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 05672e819f8c..a235f3cc471c 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -49,12 +49,9 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 			val = *s - '0';
 		else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
 			val = _tolower(*s) - 'a' + 10;
-		else if (*s == '\n') {
-			if (*(s + 1) == '\0')
-				break;
-			else
-				return -EINVAL;
-		} else
+		else if (*s == '\n' && *(s + 1) == '\0')
+			break;
+		else
 			return -EINVAL;
 
 		if (val >= base)
diff --git a/lib/string.c b/lib/string.c
index f71bead1be3e..01fad9b203e1 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -535,6 +535,35 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		break;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(strtobool);
+
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index 325c2f9ecebd..d55769d63cb8 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -315,12 +315,12 @@ static void __init test_kstrtou64_ok(void)
 		{"65537",	10,	65537},
 		{"2147483646",	10,	2147483646},
 		{"2147483647",	10,	2147483647},
-		{"2147483648",	10,	2147483648},
-		{"2147483649",	10,	2147483649},
-		{"4294967294",	10,	4294967294},
-		{"4294967295",	10,	4294967295},
-		{"4294967296",	10,	4294967296},
-		{"4294967297",	10,	4294967297},
+		{"2147483648",	10,	2147483648ULL},
+		{"2147483649",	10,	2147483649ULL},
+		{"4294967294",	10,	4294967294ULL},
+		{"4294967295",	10,	4294967295ULL},
+		{"4294967296",	10,	4294967296ULL},
+		{"4294967297",	10,	4294967297ULL},
 		{"9223372036854775806",	10,	9223372036854775806ULL},
 		{"9223372036854775807",	10,	9223372036854775807ULL},
 		{"9223372036854775808",	10,	9223372036854775808ULL},
@@ -369,12 +369,12 @@ static void __init test_kstrtos64_ok(void)
 		{"65537",	10,	65537},
 		{"2147483646",	10,	2147483646},
 		{"2147483647",	10,	2147483647},
-		{"2147483648",	10,	2147483648},
-		{"2147483649",	10,	2147483649},
-		{"4294967294",	10,	4294967294},
-		{"4294967295",	10,	4294967295},
-		{"4294967296",	10,	4294967296},
-		{"4294967297",	10,	4294967297},
+		{"2147483648",	10,	2147483648LL},
+		{"2147483649",	10,	2147483649LL},
+		{"4294967294",	10,	4294967294LL},
+		{"4294967295",	10,	4294967295LL},
+		{"4294967296",	10,	4294967296LL},
+		{"4294967297",	10,	4294967297LL},
 		{"9223372036854775806",	10,	9223372036854775806LL},
 		{"9223372036854775807",	10,	9223372036854775807LL},
 	};
@@ -418,10 +418,10 @@ static void __init test_kstrtou32_ok(void)
 		{"65537",	10,	65537},
 		{"2147483646",	10,	2147483646},
 		{"2147483647",	10,	2147483647},
-		{"2147483648",	10,	2147483648},
-		{"2147483649",	10,	2147483649},
-		{"4294967294",	10,	4294967294},
-		{"4294967295",	10,	4294967295},
+		{"2147483648",	10,	2147483648U},
+		{"2147483649",	10,	2147483649U},
+		{"4294967294",	10,	4294967294U},
+		{"4294967295",	10,	4294967295U},
 	};
 	TEST_OK(kstrtou32, u32, "%u", test_u32_ok);
 }
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index bc0ac6b333dc..dfd60192bc2e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -797,7 +797,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
-int kptr_restrict = 1;
+int kptr_restrict __read_mostly;
 
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index ea5fa4fe9d67..a6cdc969ea42 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -969,6 +969,9 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 			 */
 			tmp = b->in[b->in_pos++];
 
+			if (tmp == 0x00)
+				return XZ_STREAM_END;
+
 			if (tmp >= 0xE0 || tmp == 0x01) {
 				s->lzma2.need_props = true;
 				s->lzma2.need_dict_reset = false;
@@ -1001,9 +1004,6 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 						lzma_reset(s);
 				}
 			} else {
-				if (tmp == 0x00)
-					return XZ_STREAM_END;
-
 				if (tmp > 0x02)
 					return XZ_DATA_ERROR;
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0a619e0e2e0b..83326ad66d9b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -244,24 +244,28 @@ static ssize_t single_flag_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *buf,
 				enum transparent_hugepage_flag flag)
 {
-	if (test_bit(flag, &transparent_hugepage_flags))
-		return sprintf(buf, "[yes] no\n");
-	else
-		return sprintf(buf, "yes [no]\n");
+	return sprintf(buf, "%d\n",
+		       !!test_bit(flag, &transparent_hugepage_flags));
 }
+
 static ssize_t single_flag_store(struct kobject *kobj,
 				 struct kobj_attribute *attr,
 				 const char *buf, size_t count,
 				 enum transparent_hugepage_flag flag)
 {
-	if (!memcmp("yes", buf,
-		    min(sizeof("yes")-1, count))) {
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &value);
+	if (ret < 0)
+		return ret;
+	if (value > 1)
+		return -EINVAL;
+
+	if (value)
 		set_bit(flag, &transparent_hugepage_flags);
-	} else if (!memcmp("no", buf,
-			   min(sizeof("no")-1, count))) {
+	else
 		clear_bit(flag, &transparent_hugepage_flags);
-	} else
-		return -EINVAL;
 
 	return count;
 }
@@ -680,8 +684,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			return VM_FAULT_OOM;
 		page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
 					  vma, haddr, numa_node_id(), 0);
-		if (unlikely(!page))
+		if (unlikely(!page)) {
+			count_vm_event(THP_FAULT_FALLBACK);
 			goto out;
+		}
+		count_vm_event(THP_FAULT_ALLOC);
 		if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
 			put_page(page);
 			goto out;
@@ -909,11 +916,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		new_page = NULL;
 
 	if (unlikely(!new_page)) {
+		count_vm_event(THP_FAULT_FALLBACK);
 		ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
 						   pmd, orig_pmd, page, haddr);
 		put_page(page);
 		goto out;
 	}
+	count_vm_event(THP_FAULT_ALLOC);
 
 	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
 		put_page(new_page);
@@ -1390,6 +1399,7 @@ int split_huge_page(struct page *page)
 
 	BUG_ON(!PageSwapBacked(page));
 	__split_huge_page(page, anon_vma);
+	count_vm_event(THP_SPLIT);
 
 	BUG_ON(PageCompound(page));
 out_unlock:
@@ -1398,6 +1408,9 @@ out:
 	return ret;
 }
 
+#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
+		   VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
+
 int hugepage_madvise(struct vm_area_struct *vma,
 		     unsigned long *vm_flags, int advice)
 {
@@ -1406,11 +1419,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_HUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_NOHUGEPAGE;
 		*vm_flags |= VM_HUGEPAGE;
@@ -1426,11 +1435,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_NOHUGEPAGE |
-				 VM_SHARED   | VM_MAYSHARE   |
-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-				 VM_MIXEDMAP | VM_SAO))
+		if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
 			return -EINVAL;
 		*vm_flags &= ~VM_HUGEPAGE;
 		*vm_flags |= VM_NOHUGEPAGE;
@@ -1564,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 		 * page fault if needed.
 		 */
 		return 0;
-	if (vma->vm_file || vma->vm_ops)
+	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -1784,9 +1793,11 @@ static void collapse_huge_page(struct mm_struct *mm,
 				      node, __GFP_OTHER_NODE);
 	if (unlikely(!new_page)) {
 		up_read(&mm->mmap_sem);
+		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 		*hpage = ERR_PTR(-ENOMEM);
 		return;
 	}
+	count_vm_event(THP_COLLAPSE_ALLOC);
 	if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
 		up_read(&mm->mmap_sem);
 		put_page(new_page);
@@ -1816,12 +1827,15 @@ static void collapse_huge_page(struct mm_struct *mm,
 	    (vma->vm_flags & VM_NOHUGEPAGE))
 		goto out;
 
-	/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+	if (!vma->anon_vma || vma->vm_ops)
 		goto out;
 	if (is_vma_temporary_stack(vma))
 		goto out;
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+	/*
+	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+	 * true too, verify it here.
+	 */
+	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
@@ -2054,13 +2068,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			progress++;
 			continue;
 		}
-		/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-		if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+		if (!vma->anon_vma || vma->vm_ops)
 			goto skip;
 		if (is_vma_temporary_stack(vma))
 			goto skip;
-
-		VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+		/*
+		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
+		 * must be true too, verify it here.
+		 */
+		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
+			  vma->vm_flags & VM_NO_THP);
 
 		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 		hend = vma->vm_end & HPAGE_PMD_MASK;
@@ -2151,8 +2168,11 @@ static void khugepaged_do_scan(struct page **hpage)
 #ifndef CONFIG_NUMA
 		if (!*hpage) {
 			*hpage = alloc_hugepage(khugepaged_defrag());
-			if (unlikely(!*hpage))
+			if (unlikely(!*hpage)) {
+				count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 				break;
+			}
+			count_vm_event(THP_COLLAPSE_ALLOC);
 		}
 #else
 		if (IS_ERR(*hpage))
@@ -2192,8 +2212,11 @@ static struct page *khugepaged_alloc_hugepage(void)
 
 	do {
 		hpage = alloc_hugepage(khugepaged_defrag());
-		if (!hpage)
+		if (!hpage) {
+			count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 			khugepaged_alloc_sleep();
+		} else
+			count_vm_event(THP_COLLAPSE_ALLOC);
 	} while (unlikely(!hpage) &&
 		 likely(khugepaged_enabled()));
 	return hpage;
@@ -2210,8 +2233,11 @@ static void khugepaged_loop(void)
 	while (likely(khugepaged_enabled())) {
 #ifndef CONFIG_NUMA
 		hpage = khugepaged_alloc_hugepage();
-		if (unlikely(!hpage))
+		if (unlikely(!hpage)) {
+			count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 			break;
+		}
+		count_vm_event(THP_COLLAPSE_ALLOC);
 #else
 		if (IS_ERR(hpage)) {
 			khugepaged_alloc_sleep();
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index c1d5867543e4..aacee45616fc 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1414,9 +1414,12 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	++(*pos);
 
 	list_for_each_continue_rcu(n, &object_list) {
-		next_obj = list_entry(n, struct kmemleak_object, object_list);
-		if (get_object(next_obj))
+		struct kmemleak_object *obj =
+			list_entry(n, struct kmemleak_object, object_list);
+		if (get_object(obj)) {
+			next_obj = obj;
 			break;
+		}
 	}
 
 	put_object(prev_obj);
diff --git a/mm/memory.c b/mm/memory.c
index 9da8cab1b1b0..61e66f026563 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1359,7 +1359,7 @@ split_fallthrough:
 		 */
 		mark_page_accessed(page);
 	}
-	if (flags & FOLL_MLOCK) {
+	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
 		/*
 		 * The preliminary mapping check is mainly to avoid the
 		 * pointless overhead of lock_page on the ZERO_PAGE
@@ -1410,6 +1410,12 @@ no_page_table:
 	return page;
 }
 
+static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	return stack_guard_page_start(vma, addr) ||
+	       stack_guard_page_end(vma, addr+PAGE_SIZE);
+}
+
 /**
  * __get_user_pages() - pin user pages in memory
  * @tsk:	task_struct of target task
@@ -1488,7 +1494,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		vma = find_extend_vma(mm, start);
 		if (!vma && in_gate_area(mm, start)) {
 			unsigned long pg = start & PAGE_MASK;
-			struct vm_area_struct *gate_vma = get_gate_vma(mm);
 			pgd_t *pgd;
 			pud_t *pud;
 			pmd_t *pmd;
@@ -1513,10 +1518,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				pte_unmap(pte);
 				return i ? : -EFAULT;
 			}
+			vma = get_gate_vma(mm);
 			if (pages) {
 				struct page *page;
 
-				page = vm_normal_page(gate_vma, start, *pte);
+				page = vm_normal_page(vma, start, *pte);
 				if (!page) {
 					if (!(gup_flags & FOLL_DUMP) &&
 					     is_zero_pfn(pte_pfn(*pte)))
@@ -1530,12 +1536,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				get_page(page);
 			}
 			pte_unmap(pte);
-			if (vmas)
-				vmas[i] = gate_vma;
-			i++;
-			start += PAGE_SIZE;
-			nr_pages--;
-			continue;
+			goto next_page;
 		}
 
 		if (!vma ||
@@ -1565,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				int ret;
 				unsigned int fault_flags = 0;
 
+				/* For mlock, just skip the stack guard page. */
+				if (foll_flags & FOLL_MLOCK) {
+					if (stack_guard_page(vma, start))
+						goto next_page;
+				}
 				if (foll_flags & FOLL_WRITE)
 					fault_flags |= FAULT_FLAG_WRITE;
 				if (nonblocking)
@@ -1631,6 +1637,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				flush_anon_page(vma, page, start);
 				flush_dcache_page(page);
 			}
+next_page:
 			if (vmas)
 				vmas[i] = vma;
 			i++;
@@ -3386,7 +3393,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * run pte_offset_map on the pmd, if an huge pmd could
 	 * materialize from under us from a different thread.
 	 */
-	if (unlikely(__pte_alloc(mm, vma, pmd, address)))
+	if (unlikely(pmd_none(*pmd)) && __pte_alloc(mm, vma, pmd, address))
 		return VM_FAULT_OOM;
 	/* if an huge pmd materialized from under us just retry later */
 	if (unlikely(pmd_trans_huge(*pmd)))
@@ -3678,7 +3685,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 			 */
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 			vma = find_vma(mm, addr);
-			if (!vma)
+			if (!vma || vma->vm_start > addr)
 				break;
 			if (vma->vm_ops && vma->vm_ops->access)
 				ret = vma->vm_ops->access(vma, addr, buf,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a2acaf820fe5..9ca1d604f7cd 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -375,7 +375,7 @@ void online_page(struct page *page)
 #endif
 
 #ifdef CONFIG_FLATMEM
-	max_mapnr = max(page_to_pfn(page), max_mapnr);
+	max_mapnr = max(pfn, max_mapnr);
 #endif
 
 	ClearPageReserved(page);
diff --git a/mm/mlock.c b/mm/mlock.c
index 2689a08c79af..516b2c2ddd5a 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,13 +135,6 @@ void munlock_vma_page(struct page *page)
 	}
 }
 
-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSDOWN) &&
-		(vma->vm_start == addr) &&
-		!vma_stack_continue(vma->vm_prev, addr);
-}
-
 /**
  * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
  * @vma:   target vma
@@ -169,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 	VM_BUG_ON(end   > vma->vm_end);
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-	gup_flags = FOLL_TOUCH;
+	gup_flags = FOLL_TOUCH | FOLL_MLOCK;
 	/*
 	 * We want to touch writable mappings with a write fault in order
 	 * to break COW, except for shared mappings because these don't COW
@@ -185,15 +178,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 	if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
 		gup_flags |= FOLL_FORCE;
 
-	if (vma->vm_flags & VM_LOCKED)
-		gup_flags |= FOLL_MLOCK;
-
-	/* We don't try to access the guard page of a stack vma */
-	if (stack_guard_page(vma, start)) {
-		addr += PAGE_SIZE;
-		nr_pages--;
-	}
-
 	return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
 				NULL, NULL, nonblocking);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 2ec8eb5a9cdd..772140c53ab1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -259,7 +259,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	 * randomize_va_space to 2, which will still cause mm->start_brk
 	 * to be arbitrarily shifted
 	 */
-	if (mm->start_brk > PAGE_ALIGN(mm->end_data))
+	if (current->brk_randomized)
 		min_brk = mm->start_brk;
 	else
 		min_brk = mm->end_data;
@@ -1767,10 +1767,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 		size = address - vma->vm_start;
 		grow = (address - vma->vm_end) >> PAGE_SHIFT;
 
-		error = acct_stack_growth(vma, size, grow);
-		if (!error) {
-			vma->vm_end = address;
-			perf_event_mmap(vma);
+		error = -ENOMEM;
+		if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
+			error = acct_stack_growth(vma, size, grow);
+			if (!error) {
+				vma->vm_end = address;
+				perf_event_mmap(vma);
+			}
 		}
 	}
 	vma_unlock_anon_vma(vma);
@@ -1814,11 +1817,14 @@ static int expand_downwards(struct vm_area_struct *vma,
 		size = vma->vm_end - address;
 		grow = (vma->vm_start - address) >> PAGE_SHIFT;
 
-		error = acct_stack_growth(vma, size, grow);
-		if (!error) {
-			vma->vm_start = address;
-			vma->vm_pgoff -= grow;
-			perf_event_mmap(vma);
+		error = -ENOMEM;
+		if (grow <= vma->vm_pgoff) {
+			error = acct_stack_growth(vma, size, grow);
+			if (!error) {
+				vma->vm_start = address;
+				vma->vm_pgoff -= grow;
+				perf_event_mmap(vma);
+			}
 		}
 	}
 	vma_unlock_anon_vma(vma);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6a819d1b2c7d..f52e85c80e8d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -84,24 +84,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
 #endif /* CONFIG_NUMA */
 
 /*
- * If this is a system OOM (not a memcg OOM) and the task selected to be
- * killed is not already running at high (RT) priorities, speed up the
- * recovery by boosting the dying task to the lowest FIFO priority.
- * That helps with the recovery and avoids interfering with RT tasks.
- */
-static void boost_dying_task_prio(struct task_struct *p,
-				  struct mem_cgroup *mem)
-{
-	struct sched_param param = { .sched_priority = 1 };
-
-	if (mem)
-		return;
-
-	if (!rt_task(p))
-		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-}
-
-/*
  * The process p may have detached its own ->mm while exiting or through
  * use_mm(), but one or more of its subthreads may still have a valid
  * pointer.  Return p, or any of its subthreads with a valid ->mm, with
@@ -190,10 +172,13 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 
 	/*
 	 * The baseline for the badness score is the proportion of RAM that each
-	 * task's rss and swap space use.
+	 * task's rss, pagetable and swap space use.
 	 */
-	points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 /
-			totalpages;
+	points = get_mm_rss(p->mm) + p->mm->nr_ptes;
+	points += get_mm_counter(p->mm, MM_SWAPENTS);
+
+	points *= 1000;
+	points /= totalpages;
 	task_unlock(p);
 
 	/*
@@ -452,13 +437,6 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
 	set_tsk_thread_flag(p, TIF_MEMDIE);
 	force_sig(SIGKILL, p);
 
-	/*
-	 * We give our sacrificial lamb high priority and access to
-	 * all the memory it needs. That way it should be able to
-	 * exit() and clear out its resources quickly...
-	 */
-	boost_dying_task_prio(p, mem);
-
 	return 0;
 }
 #undef K
@@ -482,7 +460,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	 */
 	if (p->flags & PF_EXITING) {
 		set_tsk_thread_flag(p, TIF_MEMDIE);
-		boost_dying_task_prio(p, mem);
 		return 0;
 	}
 
@@ -556,7 +533,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
 	 */
 	if (fatal_signal_pending(current)) {
 		set_thread_flag(TIF_MEMDIE);
-		boost_dying_task_prio(current, NULL);
 		return;
 	}
 
@@ -712,7 +688,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 	 */
 	if (fatal_signal_pending(current)) {
 		set_thread_flag(TIF_MEMDIE);
-		boost_dying_task_prio(current, NULL);
 		return;
 	}
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2747f5e5abc1..d49df7840541 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -54,6 +54,7 @@
 #include <trace/events/kmem.h>
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
+#include <linux/prefetch.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -2317,6 +2318,21 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
+static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+{
+	if (addr) {
+		unsigned long alloc_end = addr + (PAGE_SIZE << order);
+		unsigned long used = addr + PAGE_ALIGN(size);
+
+		split_page(virt_to_page((void *)addr), order);
+		while (used < alloc_end) {
+			free_page(used);
+			used += PAGE_SIZE;
+		}
+	}
+	return (void *)addr;
+}
+
 /**
  * alloc_pages_exact - allocate an exact number physically-contiguous pages.
  * @size: the number of bytes to allocate
@@ -2336,22 +2352,33 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
 	unsigned long addr;
 
 	addr = __get_free_pages(gfp_mask, order);
-	if (addr) {
-		unsigned long alloc_end = addr + (PAGE_SIZE << order);
-		unsigned long used = addr + PAGE_ALIGN(size);
-
-		split_page(virt_to_page((void *)addr), order);
-		while (used < alloc_end) {
-			free_page(used);
-			used += PAGE_SIZE;
-		}
-	}
-
-	return (void *)addr;
+	return make_alloc_exact(addr, order, size);
 }
 EXPORT_SYMBOL(alloc_pages_exact);
 
 /**
+ * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
+ *			   pages on a node.
+ * @nid: the preferred node ID where memory should be allocated
+ * @size: the number of bytes to allocate
+ * @gfp_mask: GFP flags for the allocation
+ *
+ * Like alloc_pages_exact(), but try to allocate on node nid first before falling
+ * back.
+ * Note this is not alloc_pages_exact_node() which allocates on a specific node,
+ * but is not exact.
+ */
+void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+{
+	unsigned order = get_order(size);
+	struct page *p = alloc_pages_node(nid, gfp_mask, order);
+	if (!p)
+		return NULL;
+	return make_alloc_exact((unsigned long)page_address(p), order, size);
+}
+EXPORT_SYMBOL(alloc_pages_exact_nid);
+
+/**
  * free_pages_exact - release memory allocated via alloc_pages_exact()
  * @virt: the value returned by alloc_pages_exact.
  * @size: size of allocation, same value as passed to alloc_pages_exact().
@@ -3176,7 +3203,7 @@ static __init_refok int __build_all_zonelists(void *data)
  * Called with zonelists_mutex held always
  * unless system_state == SYSTEM_BOOTING.
  */
-void build_all_zonelists(void *data)
+void __ref build_all_zonelists(void *data)
 {
 	set_zonelist_order();
 
@@ -3564,7 +3591,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 
 	if (!slab_is_available()) {
 		zone->wait_table = (wait_queue_head_t *)
-			alloc_bootmem_node(pgdat, alloc_size);
+			alloc_bootmem_node_nopanic(pgdat, alloc_size);
 	} else {
 		/*
 		 * This case means that a zone whose size was 0 gets new memory
@@ -4141,7 +4168,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
 	unsigned long usemapsize = usemap_size(zonesize);
 	zone->pageblock_flags = NULL;
 	if (usemapsize)
-		zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
+		zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
+								   usemapsize);
 }
 #else
 static inline void setup_usemap(struct pglist_data *pgdat,
@@ -4307,7 +4335,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
 		size =  (end - start) * sizeof(struct page);
 		map = alloc_remap(pgdat->node_id, size);
 		if (!map)
-			map = alloc_bootmem_node(pgdat, size);
+			map = alloc_bootmem_node_nopanic(pgdat, size);
 		pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
 	}
 #ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 99055010cece..2daadc322ba6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -134,7 +134,7 @@ static void *__init_refok alloc_page_cgroup(size_t size, int nid)
 {
 	void *addr = NULL;
 
-	addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
+	addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
 	if (addr)
 		return addr;
 
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
index 603ae98d9694..799dcfd7cd8c 100644
--- a/mm/prio_tree.c
+++ b/mm/prio_tree.c
@@ -13,6 +13,7 @@
 
 #include <linux/mm.h>
 #include <linux/prio_tree.h>
+#include <linux/prefetch.h>
 
 /*
  * See lib/prio_tree.c for details on the general radix priority search tree
diff --git a/mm/shmem.c b/mm/shmem.c
index 58da7c150ba6..dfc7069102ee 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -421,7 +421,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
 		 * a waste to allocate index if we cannot allocate data.
 		 */
 		if (sbinfo->max_blocks) {
-			if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0)
+			if (percpu_counter_compare(&sbinfo->used_blocks,
+						sbinfo->max_blocks - 1) >= 0)
 				return ERR_PTR(-ENOSPC);
 			percpu_counter_inc(&sbinfo->used_blocks);
 			spin_lock(&inode->i_lock);
@@ -851,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
 
 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 {
-	struct inode *inode;
+	struct address_space *mapping;
 	unsigned long idx;
 	unsigned long size;
 	unsigned long limit;
@@ -874,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 	if (size > SHMEM_NR_DIRECT)
 		size = SHMEM_NR_DIRECT;
 	offset = shmem_find_swp(entry, ptr, ptr+size);
-	if (offset >= 0)
+	if (offset >= 0) {
+		shmem_swp_balance_unmap();
 		goto found;
+	}
 	if (!info->i_indirect)
 		goto lost2;
 
@@ -913,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 			if (size > ENTRIES_PER_PAGE)
 				size = ENTRIES_PER_PAGE;
 			offset = shmem_find_swp(entry, ptr, ptr+size);
-			shmem_swp_unmap(ptr);
 			if (offset >= 0) {
 				shmem_dir_unmap(dir);
 				goto found;
 			}
+			shmem_swp_unmap(ptr);
 		}
 	}
 lost1:
@@ -927,8 +930,7 @@ lost2:
 	return 0;
 found:
 	idx += offset;
-	inode = igrab(&info->vfs_inode);
-	spin_unlock(&info->lock);
+	ptr += offset;
 
 	/*
 	 * Move _head_ to start search for next from here.
@@ -939,37 +941,18 @@ found:
 	 */
 	if (shmem_swaplist.next != &info->swaplist)
 		list_move_tail(&shmem_swaplist, &info->swaplist);
-	mutex_unlock(&shmem_swaplist_mutex);
 
-	error = 1;
-	if (!inode)
-		goto out;
 	/*
-	 * Charge page using GFP_KERNEL while we can wait.
-	 * Charged back to the user(not to caller) when swap account is used.
-	 * add_to_page_cache() will be called with GFP_NOWAIT.
+	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
+	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
+	 * beneath us (pagelock doesn't help until the page is in pagecache).
 	 */
-	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
-	if (error)
-		goto out;
-	error = radix_tree_preload(GFP_KERNEL);
-	if (error) {
-		mem_cgroup_uncharge_cache_page(page);
-		goto out;
-	}
-	error = 1;
-
-	spin_lock(&info->lock);
-	ptr = shmem_swp_entry(info, idx, NULL);
-	if (ptr && ptr->val == entry.val) {
-		error = add_to_page_cache_locked(page, inode->i_mapping,
-						idx, GFP_NOWAIT);
-		/* does mem_cgroup_uncharge_cache_page on error */
-	} else	/* we must compensate for our precharge above */
-		mem_cgroup_uncharge_cache_page(page);
+	mapping = info->vfs_inode.i_mapping;
+	error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
+	/* which does mem_cgroup_uncharge_cache_page on error */
 
 	if (error == -EEXIST) {
-		struct page *filepage = find_get_page(inode->i_mapping, idx);
+		struct page *filepage = find_get_page(mapping, idx);
 		error = 1;
 		if (filepage) {
 			/*
@@ -989,14 +972,8 @@ found:
 		swap_free(entry);
 		error = 1;	/* not an error, but entry was found */
 	}
-	if (ptr)
-		shmem_swp_unmap(ptr);
+	shmem_swp_unmap(ptr);
 	spin_unlock(&info->lock);
-	radix_tree_preload_end();
-out:
-	unlock_page(page);
-	page_cache_release(page);
-	iput(inode);		/* allows for NULL */
 	return error;
 }
 
@@ -1008,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 	struct list_head *p, *next;
 	struct shmem_inode_info *info;
 	int found = 0;
+	int error;
+
+	/*
+	 * Charge page using GFP_KERNEL while we can wait, before taking
+	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
+	 * Charged back to the user (not to caller) when swap account is used.
+	 * add_to_page_cache() will be called with GFP_NOWAIT.
+	 */
+	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
+	if (error)
+		goto out;
+	/*
+	 * Try to preload while we can wait, to not make a habit of
+	 * draining atomic reserves; but don't latch on to this cpu,
+	 * it's okay if sometimes we get rescheduled after this.
+	 */
+	error = radix_tree_preload(GFP_KERNEL);
+	if (error)
+		goto uncharge;
+	radix_tree_preload_end();
 
 	mutex_lock(&shmem_swaplist_mutex);
 	list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1015,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 		found = shmem_unuse_inode(info, entry, page);
 		cond_resched();
 		if (found)
-			goto out;
+			break;
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
-	/*
-	 * Can some race bring us here?  We've been holding page lock,
-	 * so I think not; but would rather try again later than BUG()
-	 */
+
+uncharge:
+	if (!found)
+		mem_cgroup_uncharge_cache_page(page);
+	if (found < 0)
+		error = found;
+out:
 	unlock_page(page);
 	page_cache_release(page);
-out:
-	return (found < 0) ? found : 0;
+	return error;
 }
 
 /*
@@ -1063,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	else
 		swap.val = 0;
 
+	/*
+	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
+	 * if it's not already there.  Do it now because we cannot take
+	 * mutex while holding spinlock, and must do so before the page
+	 * is moved to swap cache, when its pagelock no longer protects
+	 * the inode from eviction.  But don't unlock the mutex until
+	 * we've taken the spinlock, because shmem_unuse_inode() will
+	 * prune a !swapped inode from the swaplist under both locks.
+	 */
+	if (swap.val) {
+		mutex_lock(&shmem_swaplist_mutex);
+		if (list_empty(&info->swaplist))
+			list_add_tail(&info->swaplist, &shmem_swaplist);
+	}
+
 	spin_lock(&info->lock);
+	if (swap.val)
+		mutex_unlock(&shmem_swaplist_mutex);
+
 	if (index >= info->next_index) {
 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
 		goto unlock;
@@ -1083,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		delete_from_page_cache(page);
 		shmem_swp_set(info, entry, swap.val);
 		shmem_swp_unmap(entry);
-		if (list_empty(&info->swaplist))
-			inode = igrab(inode);
-		else
-			inode = NULL;
 		spin_unlock(&info->lock);
 		swap_shmem_alloc(swap);
 		BUG_ON(page_mapped(page));
 		swap_writepage(page, wbc);
-		if (inode) {
-			mutex_lock(&shmem_swaplist_mutex);
-			/* move instead of add in case we're racing */
-			list_move_tail(&info->swaplist, &shmem_swaplist);
-			mutex_unlock(&shmem_swaplist_mutex);
-			iput(inode);
-		}
 		return 0;
 	}
 
@@ -1397,21 +1403,16 @@ repeat:
 		shmem_swp_unmap(entry);
 		sbinfo = SHMEM_SB(inode->i_sb);
 		if (sbinfo->max_blocks) {
-			if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) ||
-			    shmem_acct_block(info->flags)) {
-				spin_unlock(&info->lock);
-				error = -ENOSPC;
-				goto failed;
-			}
+			if (percpu_counter_compare(&sbinfo->used_blocks,
+						sbinfo->max_blocks) >= 0 ||
+			    shmem_acct_block(info->flags))
+				goto nospace;
 			percpu_counter_inc(&sbinfo->used_blocks);
 			spin_lock(&inode->i_lock);
 			inode->i_blocks += BLOCKS_PER_PAGE;
 			spin_unlock(&inode->i_lock);
-		} else if (shmem_acct_block(info->flags)) {
-			spin_unlock(&info->lock);
-			error = -ENOSPC;
-			goto failed;
-		}
+		} else if (shmem_acct_block(info->flags))
+			goto nospace;
 
 		if (!filepage) {
 			int ret;
@@ -1491,6 +1492,24 @@ done:
 	error = 0;
 	goto out;
 
+nospace:
+	/*
+	 * Perhaps the page was brought in from swap between find_lock_page
+	 * and taking info->lock?  We allow for that at add_to_page_cache_lru,
+	 * but must also avoid reporting a spurious ENOSPC while working on a
+	 * full tmpfs.  (When filepage has been passed in to shmem_getpage, it
+	 * is already in page cache, which prevents this race from occurring.)
+	 */
+	if (!filepage) {
+		struct page *page = find_get_page(mapping, idx);
+		if (page) {
+			spin_unlock(&info->lock);
+			page_cache_release(page);
+			goto repeat;
+		}
+	}
+	spin_unlock(&info->lock);
+	error = -ENOSPC;
 failed:
 	if (*pagep != filepage) {
 		unlock_page(filepage);
diff --git a/mm/slab.c b/mm/slab.c
index 46a9c163a92f..bcfa4987c8ae 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -115,6 +115,7 @@
 #include	<linux/debugobjects.h>
 #include	<linux/kmemcheck.h>
 #include	<linux/memory.h>
+#include	<linux/prefetch.h>
 
 #include	<asm/cacheflush.h>
 #include	<asm/tlbflush.h>
diff --git a/mm/slub.c b/mm/slub.c
index 94d2a33a866e..9d2e5e46bf09 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1940,7 +1940,7 @@ redo:
 		 * Since this is without lock semantics the protection is only against
 		 * code executing on this cpu *not* from access by other cpus.
 		 */
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				object, tid,
 				get_freepointer(s, object), next_tid(tid)))) {
@@ -2145,7 +2145,7 @@ redo:
 		set_freepointer(s, object, c->freelist);
 
 #ifdef CONFIG_CMPXCHG_LOCAL
-		if (unlikely(!this_cpu_cmpxchg_double(
+		if (unlikely(!irqsafe_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
 				object, next_tid(tid)))) {
diff --git a/mm/swap.c b/mm/swap.c
index a448db377cb0..5602f1a1b1e7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -396,6 +396,9 @@ static void lru_deactivate_fn(struct page *page, void *arg)
 	if (!PageLRU(page))
 		return;
 
+	if (PageUnevictable(page))
+		return;
+
 	/* Some processes are using the page */
 	if (page_mapped(page))
 		return;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c7f5a6d4b75b..c9177202c8ce 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,6 +41,8 @@
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
+#include <linux/oom.h>
+#include <linux/prefetch.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -936,7 +938,7 @@ keep_lumpy:
 	 * back off and wait for congestion to clear because further reclaim
 	 * will encounter the same problem
 	 */
-	if (nr_dirty == nr_congested && nr_dirty != 0)
+	if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
 		zone_set_flag(zone, ZONE_CONGESTED);
 
 	free_page_list(&free_pages);
@@ -1988,17 +1990,12 @@ static bool zone_reclaimable(struct zone *zone)
 	return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
 }
 
-/*
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. It can't handle OOM during hibernation.
- * So let's check zone's unreclaimable in direct reclaim as well as kswapd.
- */
+/* All zones in zonelist are unreclaimable? */
 static bool all_unreclaimable(struct zonelist *zonelist,
 		struct scan_control *sc)
 {
 	struct zoneref *z;
 	struct zone *zone;
-	bool all_unreclaimable = true;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 			gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2006,13 +2003,11 @@ static bool all_unreclaimable(struct zonelist *zonelist,
 			continue;
 		if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 			continue;
-		if (zone_reclaimable(zone)) {
-			all_unreclaimable = false;
-			break;
-		}
+		if (!zone->all_unreclaimable)
+			return false;
 	}
 
-	return all_unreclaimable;
+	return true;
 }
 
 /*
@@ -2108,6 +2103,14 @@ out:
 	if (sc->nr_reclaimed)
 		return sc->nr_reclaimed;
 
+	/*
+	 * As hibernation is going on, kswapd is freezed so that it can't mark
+	 * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
+	 * check.
+	 */
+	if (oom_killer_disabled)
+		return 0;
+
 	/* top priority shrink_zones still had more to do? don't OOM, then */
 	if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
 		return 1;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 772b39b87d95..897ea9e88238 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -321,9 +321,12 @@ static inline void mod_state(struct zone *zone,
 		/*
 		 * The fetching of the stat_threshold is racy. We may apply
 		 * a counter threshold to the wrong the cpu if we get
-		 * rescheduled while executing here. However, the following
-		 * will apply the threshold again and therefore bring the
-		 * counter under the threshold.
+		 * rescheduled while executing here. However, the next
+		 * counter update will apply the threshold again and
+		 * therefore bring the counter under the threshold again.
+		 *
+		 * Most of the time the thresholds are the same anyways
+		 * for all cpus in a zone.
 		 */
 		t = this_cpu_read(pcp->stat_threshold);
 
@@ -945,7 +948,16 @@ static const char * const vmstat_text[] = {
 	"unevictable_pgs_cleared",
 	"unevictable_pgs_stranded",
 	"unevictable_pgs_mlockfreed",
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	"thp_fault_alloc",
+	"thp_fault_fallback",
+	"thp_collapse_alloc",
+	"thp_collapse_alloc_failed",
+	"thp_split",
 #endif
+
+#endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
 
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
diff --git a/net/9p/client.c b/net/9p/client.c
index 0ce959218607..ceab943dfc49 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -614,7 +614,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
-		if (err != -ERESTARTSYS)
+		if (err != -ERESTARTSYS && err != -EFAULT)
 			c->status = Disconnected;
 		goto reterr;
 	}
@@ -929,15 +929,15 @@ error:
 }
 EXPORT_SYMBOL(p9_client_attach);
 
-struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
-	int clone)
+struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
+		char **wnames, int clone)
 {
 	int err;
 	struct p9_client *clnt;
 	struct p9_fid *fid;
 	struct p9_qid *wqids;
 	struct p9_req_t *req;
-	int16_t nwqids, count;
+	uint16_t nwqids, count;
 
 	err = 0;
 	wqids = NULL;
@@ -955,7 +955,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 		fid = oldfid;
 
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n",
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
 		oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
 
 	req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
@@ -1220,27 +1220,6 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fsync);
 
-int p9_client_sync_fs(struct p9_fid *fid)
-{
-	int err = 0;
-	struct p9_req_t *req;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
-
-	clnt = fid->clnt;
-	req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto error;
-	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
-	p9_free_req(clnt, req);
-error:
-	return err;
-}
-EXPORT_SYMBOL(p9_client_sync_fs);
-
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 8a4084fa8b5a..a873277cb996 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -265,7 +265,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case 'T':{
-				int16_t *nwname = va_arg(ap, int16_t *);
+				uint16_t *nwname = va_arg(ap, uint16_t *);
 				char ***wnames = va_arg(ap, char ***);
 
 				errcode = p9pdu_readf(pdu, proto_version,
@@ -468,7 +468,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 		case 'E':{
 				 int32_t cnt = va_arg(ap, int32_t);
 				 const char *k = va_arg(ap, const void *);
-				 const char *u = va_arg(ap, const void *);
+				 const char __user *u = va_arg(ap,
+							const void __user *);
 				 errcode = p9pdu_writef(pdu, proto_version, "d",
 						 cnt);
 				 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
@@ -495,7 +496,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case 'T':{
-				int16_t nwname = va_arg(ap, int);
+				uint16_t nwname = va_arg(ap, int);
 				const char **wnames = va_arg(ap, const char **);
 
 				errcode = p9pdu_writef(pdu, proto_version, "w",
@@ -673,6 +674,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
 	}
 
 	strcpy(dirent->d_name, nameptr);
+	kfree(nameptr);
 
 out:
 	return fake_pdu.offset;
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index d47880e971dd..9a70ebdec56e 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -63,10 +63,10 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
 		int nr_pages, u8 rw)
 {
 	uint32_t first_page_bytes = 0;
-	uint32_t pdata_mapped_pages;
+	int32_t pdata_mapped_pages;
 	struct trans_rpage_info  *rpinfo;
 
-	*pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
+	*pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1);
 
 	if (*pdata_off)
 		first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
@@ -75,14 +75,9 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
 	rpinfo = req->tc->private;
 	pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
 			nr_pages, rw, &rpinfo->rp_data[0]);
+	if (pdata_mapped_pages <= 0)
+		return pdata_mapped_pages;
 
-	if (pdata_mapped_pages < 0) {
-		printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
-				"nr_pages:%d\n", pdata_mapped_pages,
-				req->tc->pubuf, nr_pages);
-		pdata_mapped_pages = 0;
-		return -EIO;
-	}
 	rpinfo->rp_nr_pages = pdata_mapped_pages;
 	if (*pdata_off) {
 		*pdata_len = first_page_bytes;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e8f046b07182..244e70742183 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -326,8 +326,11 @@ req_retry_pinned:
 			outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
 					pdata_off, rpinfo->rp_data, pdata_len);
 		} else {
-			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
-								req->tc->pkbuf;
+			char *pbuf;
+			if (req->tc->pubuf)
+				pbuf = (__force char *) req->tc->pubuf;
+			else
+				pbuf = req->tc->pkbuf;
 			outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
 					req->tc->pbuf_size);
 		}
@@ -352,8 +355,12 @@ req_retry_pinned:
 			in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
 					pdata_off, rpinfo->rp_data, pdata_len);
 		} else {
-			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
-								req->tc->pkbuf;
+			char *pbuf;
+			if (req->tc->pubuf)
+				pbuf = (__force char *) req->tc->pubuf;
+			else
+				pbuf = req->tc->pkbuf;
+
 			in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
 					pbuf, req->tc->pbuf_size);
 		}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 65f39530799d..61605a0f3f39 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -29,18 +29,10 @@
 #include <linux/udp.h>
 #include <linux/if_vlan.h>
 
-static void gw_node_free_rcu(struct rcu_head *rcu)
-{
-	struct gw_node *gw_node;
-
-	gw_node = container_of(rcu, struct gw_node, rcu);
-	kfree(gw_node);
-}
-
 static void gw_node_free_ref(struct gw_node *gw_node)
 {
 	if (atomic_dec_and_test(&gw_node->refcount))
-		call_rcu(&gw_node->rcu, gw_node_free_rcu);
+		kfree_rcu(gw_node, rcu);
 }
 
 static struct gw_node *gw_get_selected_gw_node(struct bat_priv *bat_priv)
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 080ec88330a3..40a30bbcd147 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -54,18 +54,10 @@ err:
 	return 0;
 }
 
-static void neigh_node_free_rcu(struct rcu_head *rcu)
-{
-	struct neigh_node *neigh_node;
-
-	neigh_node = container_of(rcu, struct neigh_node, rcu);
-	kfree(neigh_node);
-}
-
 void neigh_node_free_ref(struct neigh_node *neigh_node)
 {
 	if (atomic_dec_and_test(&neigh_node->refcount))
-		call_rcu(&neigh_node->rcu, neigh_node_free_rcu);
+		kfree_rcu(neigh_node, rcu);
 }
 
 /* increases the refcounter of a found router */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index c76a33eeb3f1..d5aa60999e83 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -72,18 +72,10 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len)
 	return 0;
 }
 
-static void softif_neigh_free_rcu(struct rcu_head *rcu)
-{
-	struct softif_neigh *softif_neigh;
-
-	softif_neigh = container_of(rcu, struct softif_neigh, rcu);
-	kfree(softif_neigh);
-}
-
 static void softif_neigh_free_ref(struct softif_neigh *softif_neigh)
 {
 	if (atomic_dec_and_test(&softif_neigh->refcount))
-		call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu);
+		kfree_rcu(softif_neigh, rcu);
 }
 
 static void softif_neigh_vid_free_rcu(struct rcu_head *rcu)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 94954c74f6ae..42fdffd1d76c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -369,15 +369,6 @@ static void __sco_sock_close(struct sock *sk)
 
 	case BT_CONNECTED:
 	case BT_CONFIG:
-		if (sco_pi(sk)->conn) {
-			sk->sk_state = BT_DISCONN;
-			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
-			hci_conn_put(sco_pi(sk)->conn->hcon);
-			sco_pi(sk)->conn = NULL;
-		} else
-			sco_chan_del(sk, ECONNRESET);
-		break;
-
 	case BT_CONNECT:
 	case BT_DISCONN:
 		sco_chan_del(sk, ECONNRESET);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 05f357828a2f..e15a82ccc05f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2267,6 +2267,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 	m->more_to_follow = false;
 	m->pool = NULL;
 
+	/* middle */
+	m->middle = NULL;
+
+	/* data */
+	m->nr_pages = 0;
+	m->page_alignment = 0;
+	m->pages = NULL;
+	m->pagelist = NULL;
+	m->bio = NULL;
+	m->bio_iter = NULL;
+	m->bio_seg = 0;
+	m->trail = NULL;
+
 	/* front */
 	if (front_len) {
 		if (front_len > PAGE_CACHE_SIZE) {
@@ -2286,19 +2299,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 	}
 	m->front.iov_len = front_len;
 
-	/* middle */
-	m->middle = NULL;
-
-	/* data */
-	m->nr_pages = 0;
-	m->page_alignment = 0;
-	m->pages = NULL;
-	m->pagelist = NULL;
-	m->bio = NULL;
-	m->bio_iter = NULL;
-	m->bio_seg = 0;
-	m->trail = NULL;
-
 	dout("ceph_msg_new %p front %d\n", m, front_len);
 	return m;
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 50af02737a3d..6b5dda1cb5df 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -470,8 +470,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					 snapc, ops,
 					 use_mempool,
 					 GFP_NOFS, NULL, NULL);
-	if (IS_ERR(req))
-		return req;
+	if (!req)
+		return NULL;
 
 	/* calculate max write size */
 	calc_layout(osdc, vino, layout, off, plen, req, ops);
@@ -579,9 +579,15 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 
 	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
 				 r_linger_osd) {
-		__unregister_linger_request(osdc, req);
+		/*
+		 * reregister request prior to unregistering linger so
+		 * that r_osd is preserved.
+		 */
+		BUG_ON(!list_empty(&req->r_req_lru_item));
 		__register_request(osdc, req);
-		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add(&req->r_osd_item, &req->r_osd->o_requests);
+		__unregister_linger_request(osdc, req);
 		dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
 	}
@@ -798,7 +804,7 @@ static void __register_request(struct ceph_osd_client *osdc,
 	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
 	INIT_LIST_HEAD(&req->r_req_lru_item);
 
-	dout("register_request %p tid %lld\n", req, req->r_tid);
+	dout("__register_request %p tid %lld\n", req, req->r_tid);
 	__insert_request(osdc, req);
 	ceph_osdc_get_request(req);
 	osdc->num_requests++;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 7b39f3ed2fda..e2e66939ed00 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -68,14 +68,6 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
 	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
 }
 
-static void ha_rcu_free(struct rcu_head *head)
-{
-	struct netdev_hw_addr *ha;
-
-	ha = container_of(head, struct netdev_hw_addr, rcu_head);
-	kfree(ha);
-}
-
 static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
 			    unsigned char *addr, int addr_len,
 			    unsigned char addr_type, bool global)
@@ -94,7 +86,7 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
 			if (--ha->refcount)
 				return 0;
 			list_del_rcu(&ha->list);
-			call_rcu(&ha->rcu_head, ha_rcu_free);
+			kfree_rcu(ha, rcu_head);
 			list->count--;
 			return 0;
 		}
@@ -197,7 +189,7 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list)
 
 	list_for_each_entry_safe(ha, tmp, &list->list, list) {
 		list_del_rcu(&ha->list);
-		call_rcu(&ha->rcu_head, ha_rcu_free);
+		kfree_rcu(ha, rcu_head);
 	}
 	list->count = 0;
 }
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 706502ff64aa..7f36b38e060f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -207,14 +207,6 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
 	rcu_read_unlock();
 }
 
-
-static void free_dm_hw_stat(struct rcu_head *head)
-{
-	struct dm_hw_stat_delta *n;
-	n = container_of(head, struct dm_hw_stat_delta, rcu);
-	kfree(n);
-}
-
 static int set_all_monitor_traces(int state)
 {
 	int rc = 0;
@@ -245,7 +237,7 @@ static int set_all_monitor_traces(int state)
 		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
 			if (new_stat->dev == NULL) {
 				list_del_rcu(&new_stat->list);
-				call_rcu(&new_stat->rcu, free_dm_hw_stat);
+				kfree_rcu(new_stat, rcu);
 			}
 		}
 		break;
@@ -314,7 +306,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 				new_stat->dev = NULL;
 				if (trace_state == TRACE_OFF) {
 					list_del_rcu(&new_stat->list);
-					call_rcu(&new_stat->rcu, free_dm_hw_stat);
+					kfree_rcu(new_stat, rcu);
 					break;
 				}
 			}
diff --git a/net/core/dst.c b/net/core/dst.c
index da47a299618a..81a4fa1c95ed 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -19,6 +19,7 @@
 #include <linux/types.h>
 #include <net/net_namespace.h>
 #include <linux/sched.h>
+#include <linux/prefetch.h>
 
 #include <net/dst.h>
 
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7c2373321b74..43b03dd71e85 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -249,13 +249,6 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 }
 EXPORT_SYMBOL(gen_new_estimator);
 
-static void __gen_kill_estimator(struct rcu_head *head)
-{
-	struct gen_estimator *e = container_of(head,
-					struct gen_estimator, e_rcu);
-	kfree(e);
-}
-
 /**
  * gen_kill_estimator - remove a rate estimator
  * @bstats: basic statistics
@@ -279,7 +272,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 		write_unlock(&est_lock);
 
 		list_del_rcu(&e->list);
-		call_rcu(&e->e_rcu, __gen_kill_estimator);
+		kfree_rcu(e, e_rcu);
 	}
 	spin_unlock_bh(&est_tree_lock);
 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1b122177c8fa..11b98bc2aa8f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -561,13 +561,6 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
 	return len;
 }
 
-static void rps_map_release(struct rcu_head *rcu)
-{
-	struct rps_map *map = container_of(rcu, struct rps_map, rcu);
-
-	kfree(map);
-}
-
 static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 		      struct rx_queue_attribute *attribute,
 		      const char *buf, size_t len)
@@ -615,7 +608,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	spin_unlock(&rps_map_lock);
 
 	if (old_map)
-		call_rcu(&old_map->rcu, rps_map_release);
+		kfree_rcu(old_map, rcu);
 
 	free_cpumask_var(mask);
 	return len;
@@ -724,7 +717,7 @@ static void rx_queue_release(struct kobject *kobj)
 	map = rcu_dereference_raw(queue->rps_map);
 	if (map) {
 		RCU_INIT_POINTER(queue->rps_map, NULL);
-		call_rcu(&map->rcu, rps_map_release);
+		kfree_rcu(map, rcu);
 	}
 
 	flow_table = rcu_dereference_raw(queue->rps_flow_table);
@@ -894,21 +887,6 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len;
 }
 
-static void xps_map_release(struct rcu_head *rcu)
-{
-	struct xps_map *map = container_of(rcu, struct xps_map, rcu);
-
-	kfree(map);
-}
-
-static void xps_dev_maps_release(struct rcu_head *rcu)
-{
-	struct xps_dev_maps *dev_maps =
-	    container_of(rcu, struct xps_dev_maps, rcu);
-
-	kfree(dev_maps);
-}
-
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
@@ -1005,7 +983,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 		map = dev_maps ?
 			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
 		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
-			call_rcu(&map->rcu, xps_map_release);
+			kfree_rcu(map, rcu);
 		if (new_dev_maps->cpu_map[cpu])
 			nonempty = 1;
 	}
@@ -1018,7 +996,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	}
 
 	if (dev_maps)
-		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+		kfree_rcu(dev_maps, rcu);
 
 	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
 					    NUMA_NO_NODE);
@@ -1080,7 +1058,7 @@ static void netdev_queue_release(struct kobject *kobj)
 				else {
 					RCU_INIT_POINTER(dev_maps->cpu_map[i],
 					    NULL);
-					call_rcu(&map->rcu, xps_map_release);
+					kfree_rcu(map, rcu);
 					map = NULL;
 				}
 			}
@@ -1090,7 +1068,7 @@ static void netdev_queue_release(struct kobject *kobj)
 
 		if (!nonempty) {
 			RCU_INIT_POINTER(dev->xps_maps, NULL);
-			call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+			kfree_rcu(dev_maps, rcu);
 		}
 	}
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1abb50841046..2e2dce6583e1 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,14 +27,6 @@ EXPORT_SYMBOL(init_net);
 
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
-static void net_generic_release(struct rcu_head *rcu)
-{
-	struct net_generic *ng;
-
-	ng = container_of(rcu, struct net_generic, rcu);
-	kfree(ng);
-}
-
 static int net_assign_generic(struct net *net, int id, void *data)
 {
 	struct net_generic *ng, *old_ng;
@@ -68,7 +60,7 @@ static int net_assign_generic(struct net *net, int id, void *data)
 	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
 
 	rcu_assign_pointer(net->gen, ng);
-	call_rcu(&old_ng->rcu, net_generic_release);
+	kfree_rcu(old_ng, rcu);
 assign:
 	ng->ptr[id - 1] = data;
 	return 0;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 379270f14771..67870e9fd097 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -156,6 +156,7 @@
 #include <linux/wait.h>
 #include <linux/etherdevice.h>
 #include <linux/kthread.h>
+#include <linux/prefetch.h>
 #include <net/net_namespace.h>
 #include <net/checksum.h>
 #include <net/ipv6.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3e934fe96f29..46cbd28f40f9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -57,6 +57,7 @@
 #include <linux/init.h>
 #include <linux/scatterlist.h>
 #include <linux/errqueue.h>
+#include <linux/prefetch.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 404fa1591027..cf26ac74a188 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -332,14 +332,9 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
 	return ifa;
 }
 
-static void dn_dev_free_ifa_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct dn_ifaddr, rcu));
-}
-
 static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
 {
-	call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu);
+	kfree_rcu(ifa, rcu);
 }
 
 static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 641a5a2a9f9c..33e2c35b74b7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -141,18 +141,8 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
 	},
 };
 
-
 /* Release a nexthop info record */
 
-static void free_fib_info_rcu(struct rcu_head *head)
-{
-	struct fib_info *fi = container_of(head, struct fib_info, rcu);
-
-	if (fi->fib_metrics != (u32 *) dst_default_metrics)
-		kfree(fi->fib_metrics);
-	kfree(fi);
-}
-
 void free_fib_info(struct fib_info *fi)
 {
 	if (fi->fib_dead == 0) {
@@ -166,7 +156,7 @@ void free_fib_info(struct fib_info *fi)
 	} endfor_nexthops(fi);
 	fib_info_cnt--;
 	release_net(fi->fib_net);
-	call_rcu(&fi->rcu, free_fib_info_rcu);
+	kfree_rcu(fi, rcu);
 }
 
 void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6375c1c5f642..c779ce96e5b5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -369,14 +369,9 @@ static inline void free_leaf(struct leaf *l)
 	call_rcu_bh(&l->rcu, __leaf_free_rcu);
 }
 
-static void __leaf_info_free_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct leaf_info, rcu));
-}
-
 static inline void free_leaf_info(struct leaf_info *leaf)
 {
-	call_rcu(&leaf->rcu, __leaf_info_free_rcu);
+	kfree_rcu(leaf, rcu);
 }
 
 static struct tnode *tnode_alloc(size_t size)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index ec03c2fda6ce..672e476c8c8a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,17 +149,11 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
 static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 			 int sfcount, __be32 *psfsrc, int delta);
 
-
-static void ip_mc_list_reclaim(struct rcu_head *head)
-{
-	kfree(container_of(head, struct ip_mc_list, rcu));
-}
-
 static void ip_ma_put(struct ip_mc_list *im)
 {
 	if (atomic_dec_and_test(&im->refcnt)) {
 		in_dev_put(im->interface);
-		call_rcu(&im->rcu, ip_mc_list_reclaim);
+		kfree_rcu(im, rcu);
 	}
 }
 
@@ -1828,12 +1822,6 @@ done:
 }
 EXPORT_SYMBOL(ip_mc_join_group);
 
-static void ip_sf_socklist_reclaim(struct rcu_head *rp)
-{
-	kfree(container_of(rp, struct ip_sf_socklist, rcu));
-	/* sk_omem_alloc should have been decreased by the caller*/
-}
-
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
@@ -1850,18 +1838,10 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 	rcu_assign_pointer(iml->sflist, NULL);
 	/* decrease mem now to avoid the memleak warning */
 	atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
-	call_rcu(&psf->rcu, ip_sf_socklist_reclaim);
+	kfree_rcu(psf, rcu);
 	return err;
 }
 
-
-static void ip_mc_socklist_reclaim(struct rcu_head *rp)
-{
-	kfree(container_of(rp, struct ip_mc_socklist, rcu));
-	/* sk_omem_alloc should have been decreased by the caller*/
-}
-
-
 /*
  *	Ask a socket to leave a group.
  */
@@ -1901,7 +1881,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 		rtnl_unlock();
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
-		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
+		kfree_rcu(iml, rcu);
 		return 0;
 	}
 	if (!in_dev)
@@ -2018,7 +1998,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 				newpsl->sl_addr[i] = psl->sl_addr[i];
 			/* decrease mem now to avoid the memleak warning */
 			atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
-			call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
+			kfree_rcu(psl, rcu);
 		}
 		rcu_assign_pointer(pmc->sflist, newpsl);
 		psl = newpsl;
@@ -2119,7 +2099,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 			psl->sl_count, psl->sl_addr, 0);
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
-		call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
+		kfree_rcu(psl, rcu);
 	} else
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			0, NULL, 0);
@@ -2316,7 +2296,7 @@ void ip_mc_drop_socket(struct sock *sk)
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
-		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
+		kfree_rcu(iml, rcu);
 	}
 	rtnl_unlock();
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3cfbbf3387a0..498b927f68be 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -317,12 +317,6 @@ static void snmp6_free_dev(struct inet6_dev *idev)
 
 /* Nobody refers to this device, we may destroy it. */
 
-static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
-{
-	struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
-	kfree(idev);
-}
-
 void in6_dev_finish_destroy(struct inet6_dev *idev)
 {
 	struct net_device *dev = idev->dev;
@@ -339,7 +333,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 		return;
 	}
 	snmp6_free_dev(idev);
-	call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
+	kfree_rcu(idev, rcu);
 }
 
 EXPORT_SYMBOL(in6_dev_finish_destroy);
@@ -535,12 +529,6 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 }
 #endif
 
-static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
-{
-	struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
-	kfree(ifp);
-}
-
 /* Nobody refers to this ifaddr, destroy it */
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
@@ -561,7 +549,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	}
 	dst_release(&ifp->rt->dst);
 
-	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
+	kfree_rcu(ifp, rcu);
 }
 
 static void
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index ff62e33ead07..3e6ebcdb4779 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -201,10 +201,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	return 0;
 }
 
-static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
-{
-	kfree(container_of(head, struct ipv6_mc_socklist, rcu));
-}
 /*
  *	socket leave on multicast group
  */
@@ -239,7 +235,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 			rcu_read_unlock();
 			atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
-			call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
+			kfree_rcu(mc_lst, rcu);
 			return 0;
 		}
 	}
@@ -307,7 +303,7 @@ void ipv6_sock_mc_close(struct sock *sk)
 		rcu_read_unlock();
 
 		atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
-		call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
+		kfree_rcu(mc_lst, rcu);
 
 		spin_lock(&ipv6_sk_mc_lock);
 	}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a6a32b39b607..1cca5761aea9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -396,11 +396,6 @@ out:
 	return err;
 }
 
-static void prl_entry_destroy_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head));
-}
-
 static void prl_list_destroy_rcu(struct rcu_head *head)
 {
 	struct ip_tunnel_prl_entry *p, *n;
@@ -428,7 +423,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		     p = &x->next) {
 			if (x->addr == a->addr) {
 				*p = x->next;
-				call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
+				kfree_rcu(x, rcu_head);
 				t->prl_count--;
 				goto out;
 			}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index f7fb09ecaf89..b6466e71f5e1 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -687,7 +687,7 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP over IP");
 MODULE_VERSION("1.0");
 
-/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like
+/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
  * enums
  */
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index cd5125f77cc5..c8be8eff70da 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -144,14 +144,6 @@ void ieee80211_assign_tid_tx(struct sta_info *sta, int tid,
 	rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
 }
 
-static void kfree_tid_tx(struct rcu_head *rcu_head)
-{
-	struct tid_ampdu_tx *tid_tx =
-	    container_of(rcu_head, struct tid_ampdu_tx, rcu_head);
-
-	kfree(tid_tx);
-}
-
 int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				    enum ieee80211_back_parties initiator,
 				    bool tx)
@@ -174,7 +166,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 		/* not even started yet! */
 		ieee80211_assign_tid_tx(sta, tid, NULL);
 		spin_unlock_bh(&sta->lock);
-		call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
+		kfree_rcu(tid_tx, rcu_head);
 		return 0;
 	}
 
@@ -333,7 +325,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 		spin_unlock_bh(&sta->lock);
 
 		ieee80211_wake_queue_agg(local, tid);
-		call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
+		kfree_rcu(tid_tx, rcu_head);
 		return;
 	}
 
@@ -718,7 +710,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 
 	ieee80211_agg_splice_finish(local, tid);
 
-	call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
+	kfree_rcu(tid_tx, rcu_head);
 
  unlock_sta:
 	spin_unlock_bh(&sta->lock);
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index a94b312dbfac..d2e7f0e86677 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -65,17 +65,9 @@ static void run_again(struct ieee80211_local *local,
 		mod_timer(&local->work_timer, timeout);
 }
 
-static void work_free_rcu(struct rcu_head *head)
-{
-	struct ieee80211_work *wk =
-		container_of(head, struct ieee80211_work, rcu_head);
-
-	kfree(wk);
-}
-
 void free_work(struct ieee80211_work *wk)
 {
-	call_rcu(&wk->rcu_head, work_free_rcu);
+	kfree_rcu(wk, rcu_head);
 }
 
 static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 80a23ed62bb0..05ecdc281a53 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -68,12 +68,6 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 	return (void *)(*ext) + off;
 }
 
-static void __nf_ct_ext_free_rcu(struct rcu_head *head)
-{
-	struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu);
-	kfree(ext);
-}
-
 void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 {
 	struct nf_ct_ext *old, *new;
@@ -114,7 +108,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 					(void *)old + old->offset[i]);
 			rcu_read_unlock();
 		}
-		call_rcu(&old->rcu, __nf_ct_ext_free_rcu);
+		kfree_rcu(old, rcu);
 		ct->ext = new;
 	}
 
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4327e101c047..846f895cb656 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -62,13 +62,6 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
 	[OSF_ATTR_FINGER]	= { .len = sizeof(struct xt_osf_user_finger) },
 };
 
-static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
-{
-	struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
-
-	kfree(f);
-}
-
 static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
 			       const struct nlmsghdr *nlh,
 			       const struct nlattr * const osf_attrs[])
@@ -133,7 +126,7 @@ static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
 		 * We are protected by nfnl mutex.
 		 */
 		list_del_rcu(&sf->finger_entry);
-		call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu);
+		kfree_rcu(sf, rcu_head);
 
 		err = 0;
 		break;
@@ -414,7 +407,7 @@ static void __exit xt_osf_fini(void)
 
 		list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
 			list_del_rcu(&f->finger_entry);
-			call_rcu(&f->rcu_head, xt_osf_finger_free_rcu);
+			kfree_rcu(f, rcu_head);
 		}
 	}
 	rcu_read_unlock();
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e2b0a680dd56..9c38658fba8b 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -154,44 +154,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1
  */
 
 /**
- * netlbl_unlhsh_free_addr4 - Frees an IPv4 address entry from the hash table
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that memory allocated to a hash table address entry can be
- * released safely.
- *
- */
-static void netlbl_unlhsh_free_addr4(struct rcu_head *entry)
-{
-	struct netlbl_unlhsh_addr4 *ptr;
-
-	ptr = container_of(entry, struct netlbl_unlhsh_addr4, rcu);
-	kfree(ptr);
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-/**
- * netlbl_unlhsh_free_addr6 - Frees an IPv6 address entry from the hash table
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that memory allocated to a hash table address entry can be
- * released safely.
- *
- */
-static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
-{
-	struct netlbl_unlhsh_addr6 *ptr;
-
-	ptr = container_of(entry, struct netlbl_unlhsh_addr6, rcu);
-	kfree(ptr);
-}
-#endif /* IPv6 */
-
-/**
  * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table
  * @entry: the entry's RCU field
  *
@@ -568,7 +530,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 	if (entry == NULL)
 		return -ENOENT;
 
-	call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4);
+	kfree_rcu(entry, rcu);
 	return 0;
 }
 
@@ -629,7 +591,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 	if (entry == NULL)
 		return -ENOENT;
 
-	call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6);
+	kfree_rcu(entry, rcu);
 	return 0;
 }
 #endif /* IPv6 */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c8f35b5d2ee9..5fe4f3b04ed3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1566,12 +1566,6 @@ netlink_kernel_release(struct sock *sk)
 }
 EXPORT_SYMBOL(netlink_kernel_release);
 
-
-static void listeners_free_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct listeners, rcu));
-}
-
 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
 {
 	struct listeners *new, *old;
@@ -1588,7 +1582,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
 		memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
 		rcu_assign_pointer(tbl->listeners, new);
 
-		call_rcu(&old->rcu, listeners_free_rcu);
+		kfree_rcu(old, rcu);
 	}
 	tbl->groups = groups;
 
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 47b3452675b6..d2df8f33160b 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,14 +162,6 @@ int phonet_address_add(struct net_device *dev, u8 addr)
 	return err;
 }
 
-static void phonet_device_rcu_free(struct rcu_head *head)
-{
-	struct phonet_device *pnd;
-
-	pnd = container_of(head, struct phonet_device, rcu);
-	kfree(pnd);
-}
-
 int phonet_address_del(struct net_device *dev, u8 addr)
 {
 	struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -188,7 +180,7 @@ int phonet_address_del(struct net_device *dev, u8 addr)
 	mutex_unlock(&pndevs->lock);
 
 	if (pnd)
-		call_rcu(&pnd->rcu, phonet_device_rcu_free);
+		kfree_rcu(pnd, rcu);
 
 	return err;
 }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 14b42f4ad791..a606025814a1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,11 +26,6 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-static void tcf_common_free_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct tcf_common, tcfc_rcu));
-}
-
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
 	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -47,7 +42,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			 * gen_estimator est_timer() might access p->tcfc_lock
 			 * or bstats, wait a RCU grace period before freeing p
 			 */
-			call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
+			kfree_rcu(p, tcfc_rcu);
 			return;
 		}
 	}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8a1630774fd6..b3b9b32f4e00 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -96,11 +96,6 @@ nla_put_failure:
 	goto done;
 }
 
-static void tcf_police_free_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct tcf_police, tcf_rcu));
-}
-
 static void tcf_police_destroy(struct tcf_police *p)
 {
 	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -121,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p)
 			 * gen_estimator est_timer() might access p->tcf_lock
 			 * or bstats, wait a RCU grace period before freeing p
 			 */
-			call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
+			kfree_rcu(p, tcf_rcu);
 			return;
 		}
 	}
@@ -401,7 +396,6 @@ static void __exit
 police_cleanup_module(void)
 {
 	tcf_unregister_action(&act_police_ops);
-	rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 }
 
 module_init(police_init_module);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 6150ac5cf5dd..6338413376c8 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -217,7 +217,7 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
 	}
 
 	if (found) {
-		call_rcu(&addr->rcu, sctp_local_addr_free);
+		kfree_rcu(addr, rcu);
 		SCTP_DBG_OBJCNT_DEC(addr);
 		return 0;
 	}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 500875f4dc41..0bb0d7cb9f10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -130,7 +130,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 		}
 		spin_unlock_bh(&sctp_local_addr_lock);
 		if (found)
-			call_rcu(&addr->rcu, sctp_local_addr_free);
+			kfree_rcu(addr, rcu);
 		break;
 	}
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4de77cb80d88..67380a29e2e9 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -230,13 +230,6 @@ static void sctp_free_local_addr_list(void)
 	}
 }
 
-void sctp_local_addr_free(struct rcu_head *head)
-{
-	struct sctp_sockaddr_entry *e = container_of(head,
-				struct sctp_sockaddr_entry, rcu);
-	kfree(e);
-}
-
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
 			      gfp_t gfp, int copy_flags)
@@ -675,7 +668,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 		}
 		spin_unlock_bh(&sctp_local_addr_lock);
 		if (found)
-			call_rcu(&addr->rcu, sctp_local_addr_free);
+			kfree_rcu(addr, rcu);
 		break;
 	}
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index c962c6062aab..e70e5fc87890 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo));
 
 	/* Per TSVWG discussion with Randy. Allow the application to
-	 * resemble a fragmented message.
+	 * reassemble a fragmented message.
 	 */
 	ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags;
 
diff --git a/net/socket.c b/net/socket.c
index 2d5382d0de54..02dc82db3d23 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -263,15 +263,6 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-
-
-static void wq_free_rcu(struct rcu_head *head)
-{
-	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
-
-	kfree(wq);
-}
-
 static void sock_destroy_inode(struct inode *inode)
 {
 	struct socket_alloc *ei;
@@ -279,7 +270,7 @@ static void sock_destroy_inode(struct inode *inode)
 
 	ei = container_of(inode, struct socket_alloc, vfs_inode);
 	wq = rcu_dereference_protected(ei->socket.wq, 1);
-	call_rcu(&wq->rcu, wq_free_rcu);
+	kfree_rcu(wq, rcu);
 	kmem_cache_free(sock_inode_cachep, ei);
 }
 
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 8873fd8ddacd..b2198e65d8bb 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -18,14 +18,13 @@ config SUNRPC_XPRT_RDMA
 	  If unsure, say N.
 
 config RPCSEC_GSS_KRB5
-	tristate
+	tristate "Secure RPC: Kerberos V mechanism"
 	depends on SUNRPC && CRYPTO
-	prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4)
+	depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
+	depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
+	depends on CRYPTO_ARC4
 	default y
 	select SUNRPC_GSS
-	select CRYPTO_MD5
-	select CRYPTO_DES
-	select CRYPTO_CBC
 	help
 	  Choose Y here to enable Secure RPC using the Kerberos version 5
 	  GSS-API mechanism (RFC 1964).
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f3914d0c5079..339ba64cce1e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -520,7 +520,7 @@ gss_refresh_upcall(struct rpc_task *task)
 		warn_gssd();
 		task->tk_timeout = 15*HZ;
 		rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
-		return 0;
+		return -EAGAIN;
 	}
 	if (IS_ERR(gss_msg)) {
 		err = PTR_ERR(gss_msg);
@@ -563,10 +563,12 @@ retry:
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		err = wait_event_interruptible_timeout(pipe_version_waitqueue,
 				pipe_version >= 0, 15*HZ);
+		if (pipe_version < 0) {
+			warn_gssd();
+			err = -EACCES;
+		}
 		if (err)
 			goto out;
-		if (pipe_version < 0)
-			warn_gssd();
 		goto retry;
 	}
 	if (IS_ERR(gss_msg)) {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e7a96e478f63..8d83f9d48713 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1508,7 +1508,10 @@ call_timeout(struct rpc_task *task)
 		if (clnt->cl_chatty)
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
-		rpc_exit(task, -EIO);
+		if (task->tk_flags & RPC_TASK_TIMEOUT)
+			rpc_exit(task, -ETIMEDOUT);
+		else
+			rpc_exit(task, -EIO);
 		return;
 	}
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9494c3767356..ce5eb68a9664 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -906,6 +906,7 @@ void xprt_transmit(struct rpc_task *task)
 	}
 
 	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
 	xprt->ops->set_retrans_timeout(task);
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d5f925abe4d2..6165622c3e29 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -244,14 +244,19 @@ endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
 ifdef BUILD_C_RECORDMCOUNT
+ifeq ("$(origin RECORDMCOUNT_WARN)", "command line")
+  RECORDMCOUNT_FLAGS = -w
+endif
 # Due to recursion, we must skip empty.o.
 # The empty.o file is created in the make process in order to determine
 #  the target endianness and word size. It is made before all other C
 #  files, including recordmcount.
 sub_cmd_record_mcount =					\
 	if [ $(@) != "scripts/mod/empty.o" ]; then	\
-		$(objtree)/scripts/recordmcount "$(@)";	\
+		$(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)";	\
 	fi;
+recordmcount_source := $(srctree)/scripts/recordmcount.c \
+		    $(srctree)/scripts/recordmcount.h
 else
 sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
 	"$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
@@ -259,6 +264,7 @@ sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH
 	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
 	"$(LD)" "$(NM)" "$(RM)" "$(MV)" \
 	"$(if $(part-of-module),1,0)" "$(@)";
+recordmcount_source := $(srctree)/scripts/recordmcount.pl
 endif
 cmd_record_mcount = 						\
 	if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then	\
@@ -279,13 +285,13 @@ define rule_cc_o_c
 endef
 
 # Built-in and composite module parts
-$(obj)/%.o: $(src)/%.c FORCE
+$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
 	$(call cmd,force_checksrc)
 	$(call if_changed_rule,cc_o_c)
 
 # Single-part modules are special since we need to mark them in $(MODVERDIR)
 
-$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE
+$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
 	$(call cmd,force_checksrc)
 	$(call if_changed_rule,cc_o_c)
 	@{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod)
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 659326c3e895..006ad817cd5f 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -332,7 +332,7 @@ static int conf_choice(struct menu *menu)
 		}
 		if (!child)
 			continue;
-		if (line[strlen(line) - 1] == '?') {
+		if (line[0] && line[strlen(line) - 1] == '?') {
 			print_help(child);
 			continue;
 		}
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index cd104afcc5f2..413c53693e62 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -420,11 +420,10 @@ static int parse_elf(struct elf_info *info, const char *filename)
 		return 0;
 	}
 
-	if (hdr->e_shnum == 0) {
+	if (hdr->e_shnum == SHN_UNDEF) {
 		/*
 		 * There are more than 64k sections,
 		 * read count from .sh_size.
-		 * note: it doesn't need shndx2secindex()
 		 */
 		info->num_sections = TO_NATIVE(sechdrs[0].sh_size);
 	}
@@ -432,8 +431,7 @@ static int parse_elf(struct elf_info *info, const char *filename)
 		info->num_sections = hdr->e_shnum;
 	}
 	if (hdr->e_shstrndx == SHN_XINDEX) {
-		info->secindex_strings =
-		    shndx2secindex(TO_NATIVE(sechdrs[0].sh_link));
+		info->secindex_strings = TO_NATIVE(sechdrs[0].sh_link);
 	}
 	else {
 		info->secindex_strings = hdr->e_shstrndx;
@@ -489,7 +487,7 @@ static int parse_elf(struct elf_info *info, const char *filename)
 			    sechdrs[i].sh_offset;
 			info->symtab_stop  = (void *)hdr +
 			    sechdrs[i].sh_offset + sechdrs[i].sh_size;
-			sh_link_idx = shndx2secindex(sechdrs[i].sh_link);
+			sh_link_idx = sechdrs[i].sh_link;
 			info->strtab       = (void *)hdr +
 			    sechdrs[sh_link_idx].sh_offset;
 		}
@@ -516,11 +514,9 @@ static int parse_elf(struct elf_info *info, const char *filename)
 
 	if (symtab_shndx_idx != ~0U) {
 		Elf32_Word *p;
-		if (symtab_idx !=
-		    shndx2secindex(sechdrs[symtab_shndx_idx].sh_link))
+		if (symtab_idx != sechdrs[symtab_shndx_idx].sh_link)
 			fatal("%s: SYMTAB_SHNDX has bad sh_link: %u!=%u\n",
-			      filename,
-			      shndx2secindex(sechdrs[symtab_shndx_idx].sh_link),
+			      filename, sechdrs[symtab_shndx_idx].sh_link,
 			      symtab_idx);
 		/* Fix endianness */
 		for (p = info->symtab_shndx_start; p < info->symtab_shndx_stop;
@@ -1446,7 +1442,7 @@ static unsigned int *reloc_location(struct elf_info *elf,
 				    Elf_Shdr *sechdr, Elf_Rela *r)
 {
 	Elf_Shdr *sechdrs = elf->sechdrs;
-	int section = shndx2secindex(sechdr->sh_info);
+	int section = sechdr->sh_info;
 
 	return (void *)elf->hdr + sechdrs[section].sh_offset +
 		r->r_offset;
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 0388cfccac8d..2031119080dc 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -145,33 +145,22 @@ static inline int is_shndx_special(unsigned int i)
 	return i != SHN_XINDEX && i >= SHN_LORESERVE && i <= SHN_HIRESERVE;
 }
 
-/* shndx is in [0..SHN_LORESERVE) U (SHN_HIRESERVE, 0xfffffff], thus:
- * shndx == 0               <=> sechdrs[0]
- * ......
- * shndx == SHN_LORESERVE-1 <=> sechdrs[SHN_LORESERVE-1]
- * shndx == SHN_HIRESERVE+1 <=> sechdrs[SHN_LORESERVE]
- * shndx == SHN_HIRESERVE+2 <=> sechdrs[SHN_LORESERVE+1]
- * ......
- * fyi: sym->st_shndx is uint16, SHN_LORESERVE = ff00, SHN_HIRESERVE = ffff,
- * so basically we map  0000..feff -> 0000..feff
- *                      ff00..ffff -> (you are a bad boy, dont do it)
- *                     10000..xxxx -> ff00..(xxxx-0x100)
+/*
+ * Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of
+ * the way to -256..-1, to avoid conflicting with real section
+ * indices.
  */
-static inline unsigned int shndx2secindex(unsigned int i)
-{
-	if (i <= SHN_HIRESERVE)
-		return i;
-	return i - (SHN_HIRESERVE + 1 - SHN_LORESERVE);
-}
+#define SPECIAL(i) ((i) - (SHN_HIRESERVE + 1))
 
 /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
 static inline unsigned int get_secindex(const struct elf_info *info,
 					const Elf_Sym *sym)
 {
+	if (is_shndx_special(sym->st_shndx))
+		return SPECIAL(sym->st_shndx);
 	if (sym->st_shndx != SHN_XINDEX)
 		return sym->st_shndx;
-	return shndx2secindex(info->symtab_shndx_start[sym -
-						       info->symtab_start]);
+	return info->symtab_shndx_start[sym - info->symtab_start];
 }
 
 /* file2alias.c */
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 47a1f9ae0ede..0865b3e752be 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -5,4 +5,15 @@
  */
 SECTIONS {
 	/DISCARD/ : { *(.discard) }
+
+	__ksymtab		: { *(SORT(___ksymtab+*)) }
+	__ksymtab_gpl		: { *(SORT(___ksymtab_gpl+*)) }
+	__ksymtab_unused	: { *(SORT(___ksymtab_unused+*)) }
+	__ksymtab_unused_gpl	: { *(SORT(___ksymtab_unused_gpl+*)) }
+	__ksymtab_gpl_future	: { *(SORT(___ksymtab_gpl_future+*)) }
+	__kcrctab		: { *(SORT(___kcrctab+*)) }
+	__kcrctab_gpl		: { *(SORT(___kcrctab_gpl+*)) }
+	__kcrctab_unused	: { *(SORT(___kcrctab_unused+*)) }
+	__kcrctab_unused_gpl	: { *(SORT(___kcrctab_unused_gpl+*)) }
+	__kcrctab_gpl_future	: { *(SORT(___kcrctab_gpl_future+*)) }
 }
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index f9f6f52db772..ee52cb8e17ad 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -24,6 +24,7 @@
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
+#include <getopt.h>
 #include <elf.h>
 #include <fcntl.h>
 #include <setjmp.h>
@@ -39,6 +40,7 @@ static char gpfx;	/* prefix for global symbol name (sometimes '_') */
 static struct stat sb;	/* Remember .st_size, etc. */
 static jmp_buf jmpenv;	/* setjmp/longjmp per-file error escape */
 static const char *altmcount;	/* alternate mcount symbol name */
+static int warn_on_notrace_sect; /* warn when section has mcount not being recorded */
 
 /* setjmp() return values */
 enum {
@@ -78,7 +80,7 @@ static off_t
 ulseek(int const fd, off_t const offset, int const whence)
 {
 	off_t const w = lseek(fd, offset, whence);
-	if ((off_t)-1 == w) {
+	if (w == (off_t)-1) {
 		perror("lseek");
 		fail_file();
 	}
@@ -111,13 +113,41 @@ static void *
 umalloc(size_t size)
 {
 	void *const addr = malloc(size);
-	if (0 == addr) {
+	if (addr == 0) {
 		fprintf(stderr, "malloc failed: %zu bytes\n", size);
 		fail_file();
 	}
 	return addr;
 }
 
+static unsigned char ideal_nop5_x86_64[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+static unsigned char ideal_nop5_x86_32[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
+static unsigned char *ideal_nop;
+
+static char rel_type_nop;
+
+static int (*make_nop)(void *map, size_t const offset);
+
+static int make_nop_x86(void *map, size_t const offset)
+{
+	uint32_t *ptr;
+	unsigned char *op;
+
+	/* Confirm we have 0xe8 0x0 0x0 0x0 0x0 */
+	ptr = map + offset;
+	if (*ptr != 0)
+		return -1;
+
+	op = map + offset - 1;
+	if (*op != 0xe8)
+		return -1;
+
+	/* convert to nop */
+	ulseek(fd_map, offset - 1, SEEK_SET);
+	uwrite(fd_map, ideal_nop, 5);
+	return 0;
+}
+
 /*
  * Get the whole file as a programming convenience in order to avoid
  * malloc+lseek+read+free of many pieces.  If successful, then mmap
@@ -136,7 +166,7 @@ static void *mmap_file(char const *fname)
 	void *addr;
 
 	fd_map = open(fname, O_RDWR);
-	if (0 > fd_map || 0 > fstat(fd_map, &sb)) {
+	if (fd_map < 0 || fstat(fd_map, &sb) < 0) {
 		perror(fname);
 		fail_file();
 	}
@@ -147,7 +177,7 @@ static void *mmap_file(char const *fname)
 	addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE,
 		    fd_map, 0);
 	mmap_failed = 0;
-	if (MAP_FAILED == addr) {
+	if (addr == MAP_FAILED) {
 		mmap_failed = 1;
 		addr = umalloc(sb.st_size);
 		uread(fd_map, addr, sb.st_size);
@@ -206,12 +236,13 @@ static uint32_t (*w2)(uint16_t);
 static int
 is_mcounted_section_name(char const *const txtname)
 {
-	return 0 == strcmp(".text",           txtname) ||
-		0 == strcmp(".ref.text",      txtname) ||
-		0 == strcmp(".sched.text",    txtname) ||
-		0 == strcmp(".spinlock.text", txtname) ||
-		0 == strcmp(".irqentry.text", txtname) ||
-		0 == strcmp(".text.unlikely", txtname);
+	return strcmp(".text",           txtname) == 0 ||
+		strcmp(".ref.text",      txtname) == 0 ||
+		strcmp(".sched.text",    txtname) == 0 ||
+		strcmp(".spinlock.text", txtname) == 0 ||
+		strcmp(".irqentry.text", txtname) == 0 ||
+		strcmp(".kprobes.text", txtname) == 0 ||
+		strcmp(".text.unlikely", txtname) == 0;
 }
 
 /* 32 bit and 64 bit are very similar */
@@ -264,43 +295,48 @@ do_file(char const *const fname)
 	w8 = w8nat;
 	switch (ehdr->e_ident[EI_DATA]) {
 		static unsigned int const endian = 1;
-	default: {
+	default:
 		fprintf(stderr, "unrecognized ELF data encoding %d: %s\n",
 			ehdr->e_ident[EI_DATA], fname);
 		fail_file();
-	} break;
-	case ELFDATA2LSB: {
-		if (1 != *(unsigned char const *)&endian) {
+		break;
+	case ELFDATA2LSB:
+		if (*(unsigned char const *)&endian != 1) {
 			/* main() is big endian, file.o is little endian. */
 			w = w4rev;
 			w2 = w2rev;
 			w8 = w8rev;
 		}
-	} break;
-	case ELFDATA2MSB: {
-		if (0 != *(unsigned char const *)&endian) {
+		break;
+	case ELFDATA2MSB:
+		if (*(unsigned char const *)&endian != 0) {
 			/* main() is little endian, file.o is big endian. */
 			w = w4rev;
 			w2 = w2rev;
 			w8 = w8rev;
 		}
-	} break;
+		break;
 	}  /* end switch */
-	if (0 != memcmp(ELFMAG, ehdr->e_ident, SELFMAG)
-	||  ET_REL != w2(ehdr->e_type)
-	||  EV_CURRENT != ehdr->e_ident[EI_VERSION]) {
+	if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0
+	||  w2(ehdr->e_type) != ET_REL
+	||  ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
 		fprintf(stderr, "unrecognized ET_REL file %s\n", fname);
 		fail_file();
 	}
 
 	gpfx = 0;
 	switch (w2(ehdr->e_machine)) {
-	default: {
+	default:
 		fprintf(stderr, "unrecognized e_machine %d %s\n",
 			w2(ehdr->e_machine), fname);
 		fail_file();
-	} break;
-	case EM_386:	 reltype = R_386_32;                   break;
+		break;
+	case EM_386:
+		reltype = R_386_32;
+		make_nop = make_nop_x86;
+		ideal_nop = ideal_nop5_x86_32;
+		mcount_adjust_32 = -1;
+		break;
 	case EM_ARM:	 reltype = R_ARM_ABS32;
 			 altmcount = "__gnu_mcount_nc";
 			 break;
@@ -311,67 +347,91 @@ do_file(char const *const fname)
 	case EM_S390:    /* reltype: e_class    */ gpfx = '_'; break;
 	case EM_SH:	 reltype = R_SH_DIR32;                 break;
 	case EM_SPARCV9: reltype = R_SPARC_64;     gpfx = '_'; break;
-	case EM_X86_64:	 reltype = R_X86_64_64;                break;
+	case EM_X86_64:
+		make_nop = make_nop_x86;
+		ideal_nop = ideal_nop5_x86_64;
+		reltype = R_X86_64_64;
+		mcount_adjust_64 = -1;
+		break;
 	}  /* end switch */
 
 	switch (ehdr->e_ident[EI_CLASS]) {
-	default: {
+	default:
 		fprintf(stderr, "unrecognized ELF class %d %s\n",
 			ehdr->e_ident[EI_CLASS], fname);
 		fail_file();
-	} break;
-	case ELFCLASS32: {
-		if (sizeof(Elf32_Ehdr) != w2(ehdr->e_ehsize)
-		||  sizeof(Elf32_Shdr) != w2(ehdr->e_shentsize)) {
+		break;
+	case ELFCLASS32:
+		if (w2(ehdr->e_ehsize) != sizeof(Elf32_Ehdr)
+		||  w2(ehdr->e_shentsize) != sizeof(Elf32_Shdr)) {
 			fprintf(stderr,
 				"unrecognized ET_REL file: %s\n", fname);
 			fail_file();
 		}
-		if (EM_S390 == w2(ehdr->e_machine))
+		if (w2(ehdr->e_machine) == EM_S390) {
 			reltype = R_390_32;
-		if (EM_MIPS == w2(ehdr->e_machine)) {
+			mcount_adjust_32 = -4;
+		}
+		if (w2(ehdr->e_machine) == EM_MIPS) {
 			reltype = R_MIPS_32;
 			is_fake_mcount32 = MIPS32_is_fake_mcount;
 		}
 		do32(ehdr, fname, reltype);
-	} break;
+		break;
 	case ELFCLASS64: {
 		Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr;
-		if (sizeof(Elf64_Ehdr) != w2(ghdr->e_ehsize)
-		||  sizeof(Elf64_Shdr) != w2(ghdr->e_shentsize)) {
+		if (w2(ghdr->e_ehsize) != sizeof(Elf64_Ehdr)
+		||  w2(ghdr->e_shentsize) != sizeof(Elf64_Shdr)) {
 			fprintf(stderr,
 				"unrecognized ET_REL file: %s\n", fname);
 			fail_file();
 		}
-		if (EM_S390 == w2(ghdr->e_machine))
+		if (w2(ghdr->e_machine) == EM_S390) {
 			reltype = R_390_64;
-		if (EM_MIPS == w2(ghdr->e_machine)) {
+			mcount_adjust_64 = -8;
+		}
+		if (w2(ghdr->e_machine) == EM_MIPS) {
 			reltype = R_MIPS_64;
 			Elf64_r_sym = MIPS64_r_sym;
 			Elf64_r_info = MIPS64_r_info;
 			is_fake_mcount64 = MIPS64_is_fake_mcount;
 		}
 		do64(ghdr, fname, reltype);
-	} break;
+		break;
+	}
 	}  /* end switch */
 
 	cleanup();
 }
 
 int
-main(int argc, char const *argv[])
+main(int argc, char *argv[])
 {
 	const char ftrace[] = "/ftrace.o";
 	int ftrace_size = sizeof(ftrace) - 1;
 	int n_error = 0;  /* gcc-4.3.0 false positive complaint */
+	int c;
+	int i;
+
+	while ((c = getopt(argc, argv, "w")) >= 0) {
+		switch (c) {
+		case 'w':
+			warn_on_notrace_sect = 1;
+			break;
+		default:
+			fprintf(stderr, "usage: recordmcount [-w] file.o...\n");
+			return 0;
+		}
+	}
 
-	if (argc <= 1) {
-		fprintf(stderr, "usage: recordmcount file.o...\n");
+	if ((argc - optind) < 1) {
+		fprintf(stderr, "usage: recordmcount [-w] file.o...\n");
 		return 0;
 	}
 
 	/* Process each file in turn, allowing deep failure. */
-	for (--argc, ++argv; 0 < argc; --argc, ++argv) {
+	for (i = optind; i < argc; i++) {
+		char *file = argv[i];
 		int const sjval = setjmp(jmpenv);
 		int len;
 
@@ -380,29 +440,29 @@ main(int argc, char const *argv[])
 		 * function but does not call it. Since ftrace.o should
 		 * not be traced anyway, we just skip it.
 		 */
-		len = strlen(argv[0]);
+		len = strlen(file);
 		if (len >= ftrace_size &&
-		    strcmp(argv[0] + (len - ftrace_size), ftrace) == 0)
+		    strcmp(file + (len - ftrace_size), ftrace) == 0)
 			continue;
 
 		switch (sjval) {
-		default: {
-			fprintf(stderr, "internal error: %s\n", argv[0]);
+		default:
+			fprintf(stderr, "internal error: %s\n", file);
 			exit(1);
-		} break;
-		case SJ_SETJMP: {  /* normal sequence */
+			break;
+		case SJ_SETJMP:    /* normal sequence */
 			/* Avoid problems if early cleanup() */
 			fd_map = -1;
 			ehdr_curr = NULL;
 			mmap_failed = 1;
-			do_file(argv[0]);
-		} break;
-		case SJ_FAIL: {  /* error in do_file or below */
+			do_file(file);
+			break;
+		case SJ_FAIL:    /* error in do_file or below */
 			++n_error;
-		} break;
-		case SJ_SUCCEED: {  /* premature success */
+			break;
+		case SJ_SUCCEED:    /* premature success */
 			/* do nothing */
-		} break;
+			break;
 		}  /* end switch */
 	}
 	return !!n_error;
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index baf187bee983..4be60364a405 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -22,11 +22,15 @@
 #undef is_fake_mcount
 #undef fn_is_fake_mcount
 #undef MIPS_is_fake_mcount
+#undef mcount_adjust
 #undef sift_rel_mcount
+#undef nop_mcount
 #undef find_secsym_ndx
 #undef __has_rel_mcount
 #undef has_rel_mcount
 #undef tot_relsize
+#undef get_mcountsym
+#undef get_sym_str_and_relp
 #undef do_func
 #undef Elf_Addr
 #undef Elf_Ehdr
@@ -49,14 +53,18 @@
 #ifdef RECORD_MCOUNT_64
 # define append_func		append64
 # define sift_rel_mcount	sift64_rel_mcount
+# define nop_mcount		nop_mcount_64
 # define find_secsym_ndx	find64_secsym_ndx
 # define __has_rel_mcount	__has64_rel_mcount
 # define has_rel_mcount		has64_rel_mcount
 # define tot_relsize		tot64_relsize
+# define get_sym_str_and_relp	get_sym_str_and_relp_64
 # define do_func		do64
+# define get_mcountsym		get_mcountsym_64
 # define is_fake_mcount		is_fake_mcount64
 # define fn_is_fake_mcount	fn_is_fake_mcount64
 # define MIPS_is_fake_mcount	MIPS64_is_fake_mcount
+# define mcount_adjust		mcount_adjust_64
 # define Elf_Addr		Elf64_Addr
 # define Elf_Ehdr		Elf64_Ehdr
 # define Elf_Shdr		Elf64_Shdr
@@ -77,14 +85,18 @@
 #else
 # define append_func		append32
 # define sift_rel_mcount	sift32_rel_mcount
+# define nop_mcount		nop_mcount_32
 # define find_secsym_ndx	find32_secsym_ndx
 # define __has_rel_mcount	__has32_rel_mcount
 # define has_rel_mcount		has32_rel_mcount
 # define tot_relsize		tot32_relsize
+# define get_sym_str_and_relp	get_sym_str_and_relp_32
 # define do_func		do32
+# define get_mcountsym		get_mcountsym_32
 # define is_fake_mcount		is_fake_mcount32
 # define fn_is_fake_mcount	fn_is_fake_mcount32
 # define MIPS_is_fake_mcount	MIPS32_is_fake_mcount
+# define mcount_adjust		mcount_adjust_32
 # define Elf_Addr		Elf32_Addr
 # define Elf_Ehdr		Elf32_Ehdr
 # define Elf_Shdr		Elf32_Shdr
@@ -123,6 +135,8 @@ static void fn_ELF_R_INFO(Elf_Rel *const rp, unsigned sym, unsigned type)
 }
 static void (*Elf_r_info)(Elf_Rel *const rp, unsigned sym, unsigned type) = fn_ELF_R_INFO;
 
+static int mcount_adjust = 0;
+
 /*
  * MIPS mcount long call has 2 _mcount symbols, only the position of the 1st
  * _mcount symbol is needed for dynamic function tracer, with it, to disable
@@ -234,6 +248,49 @@ static void append_func(Elf_Ehdr *const ehdr,
 	uwrite(fd_map, ehdr, sizeof(*ehdr));
 }
 
+static unsigned get_mcountsym(Elf_Sym const *const sym0,
+			      Elf_Rel const *relp,
+			      char const *const str0)
+{
+	unsigned mcountsym = 0;
+
+	Elf_Sym const *const symp =
+		&sym0[Elf_r_sym(relp)];
+	char const *symname = &str0[w(symp->st_name)];
+	char const *mcount = gpfx == '_' ? "_mcount" : "mcount";
+
+	if (symname[0] == '.')
+		++symname;  /* ppc64 hack */
+	if (strcmp(mcount, symname) == 0 ||
+	    (altmcount && strcmp(altmcount, symname) == 0))
+		mcountsym = Elf_r_sym(relp);
+
+	return mcountsym;
+}
+
+static void get_sym_str_and_relp(Elf_Shdr const *const relhdr,
+				 Elf_Ehdr const *const ehdr,
+				 Elf_Sym const **sym0,
+				 char const **str0,
+				 Elf_Rel const **relp)
+{
+	Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff)
+		+ (void *)ehdr);
+	unsigned const symsec_sh_link = w(relhdr->sh_link);
+	Elf_Shdr const *const symsec = &shdr0[symsec_sh_link];
+	Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
+	Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset)
+		+ (void *)ehdr);
+
+	*sym0 = (Elf_Sym const *)(_w(symsec->sh_offset)
+				  + (void *)ehdr);
+
+	*str0 = (char const *)(_w(strsec->sh_offset)
+			       + (void *)ehdr);
+
+	*relp = rel0;
+}
+
 /*
  * Look at the relocations in order to find the calls to mcount.
  * Accumulate the section offsets that are found, and their relocation info,
@@ -250,47 +307,27 @@ static uint_t *sift_rel_mcount(uint_t *mlocp,
 {
 	uint_t *const mloc0 = mlocp;
 	Elf_Rel *mrelp = *mrelpp;
-	Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff)
-		+ (void *)ehdr);
-	unsigned const symsec_sh_link = w(relhdr->sh_link);
-	Elf_Shdr const *const symsec = &shdr0[symsec_sh_link];
-	Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symsec->sh_offset)
-		+ (void *)ehdr);
-
-	Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
-	char const *const str0 = (char const *)(_w(strsec->sh_offset)
-		+ (void *)ehdr);
-
-	Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset)
-		+ (void *)ehdr);
+	Elf_Sym const *sym0;
+	char const *str0;
+	Elf_Rel const *relp;
 	unsigned rel_entsize = _w(relhdr->sh_entsize);
 	unsigned const nrel = _w(relhdr->sh_size) / rel_entsize;
-	Elf_Rel const *relp = rel0;
-
 	unsigned mcountsym = 0;
 	unsigned t;
 
+	get_sym_str_and_relp(relhdr, ehdr, &sym0, &str0, &relp);
+
 	for (t = nrel; t; --t) {
-		if (!mcountsym) {
-			Elf_Sym const *const symp =
-				&sym0[Elf_r_sym(relp)];
-			char const *symname = &str0[w(symp->st_name)];
-			char const *mcount = '_' == gpfx ? "_mcount" : "mcount";
-
-			if ('.' == symname[0])
-				++symname;  /* ppc64 hack */
-			if (0 == strcmp(mcount, symname) ||
-			    (altmcount && 0 == strcmp(altmcount, symname)))
-				mcountsym = Elf_r_sym(relp);
-		}
+		if (!mcountsym)
+			mcountsym = get_mcountsym(sym0, relp, str0);
 
 		if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) {
-			uint_t const addend = _w(_w(relp->r_offset) - recval);
-
+			uint_t const addend =
+				_w(_w(relp->r_offset) - recval + mcount_adjust);
 			mrelp->r_offset = _w(offbase
 				+ ((void *)mlocp - (void *)mloc0));
 			Elf_r_info(mrelp, recsym, reltype);
-			if (sizeof(Elf_Rela) == rel_entsize) {
+			if (rel_entsize == sizeof(Elf_Rela)) {
 				((Elf_Rela *)mrelp)->r_addend = addend;
 				*mlocp++ = 0;
 			} else
@@ -304,6 +341,63 @@ static uint_t *sift_rel_mcount(uint_t *mlocp,
 	return mlocp;
 }
 
+/*
+ * Read the relocation table again, but this time its called on sections
+ * that are not going to be traced. The mcount calls here will be converted
+ * into nops.
+ */
+static void nop_mcount(Elf_Shdr const *const relhdr,
+		       Elf_Ehdr const *const ehdr,
+		       const char *const txtname)
+{
+	Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff)
+		+ (void *)ehdr);
+	Elf_Sym const *sym0;
+	char const *str0;
+	Elf_Rel const *relp;
+	Elf_Shdr const *const shdr = &shdr0[w(relhdr->sh_info)];
+	unsigned rel_entsize = _w(relhdr->sh_entsize);
+	unsigned const nrel = _w(relhdr->sh_size) / rel_entsize;
+	unsigned mcountsym = 0;
+	unsigned t;
+	int once = 0;
+
+	get_sym_str_and_relp(relhdr, ehdr, &sym0, &str0, &relp);
+
+	for (t = nrel; t; --t) {
+		int ret = -1;
+
+		if (!mcountsym)
+			mcountsym = get_mcountsym(sym0, relp, str0);
+
+		if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) {
+			if (make_nop)
+				ret = make_nop((void *)ehdr, shdr->sh_offset + relp->r_offset);
+			if (warn_on_notrace_sect && !once) {
+				printf("Section %s has mcount callers being ignored\n",
+				       txtname);
+				once = 1;
+				/* just warn? */
+				if (!make_nop)
+					return;
+			}
+		}
+
+		/*
+		 * If we successfully removed the mcount, mark the relocation
+		 * as a nop (don't do anything with it).
+		 */
+		if (!ret) {
+			Elf_Rel rel;
+			rel = *(Elf_Rel *)relp;
+			Elf_r_info(&rel, Elf_r_sym(relp), rel_type_nop);
+			ulseek(fd_map, (void *)relp - (void *)ehdr, SEEK_SET);
+			uwrite(fd_map, &rel, sizeof(rel));
+		}
+		relp = (Elf_Rel const *)(rel_entsize + (void *)relp);
+	}
+}
+
 
 /*
  * Find a symbol in the given section, to be used as the base for relocating
@@ -354,13 +448,13 @@ __has_rel_mcount(Elf_Shdr const *const relhdr,  /* is SHT_REL or SHT_RELA */
 	Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)];
 	char const *const txtname = &shstrtab[w(txthdr->sh_name)];
 
-	if (0 == strcmp("__mcount_loc", txtname)) {
+	if (strcmp("__mcount_loc", txtname) == 0) {
 		fprintf(stderr, "warning: __mcount_loc already exists: %s\n",
 			fname);
 		succeed_file();
 	}
-	if (SHT_PROGBITS != w(txthdr->sh_type) ||
-	    !is_mcounted_section_name(txtname))
+	if (w(txthdr->sh_type) != SHT_PROGBITS ||
+	    !(w(txthdr->sh_flags) & SHF_EXECINSTR))
 		return NULL;
 	return txtname;
 }
@@ -370,7 +464,7 @@ static char const *has_rel_mcount(Elf_Shdr const *const relhdr,
 				  char const *const shstrtab,
 				  char const *const fname)
 {
-	if (SHT_REL  != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type))
+	if (w(relhdr->sh_type) != SHT_REL && w(relhdr->sh_type) != SHT_RELA)
 		return NULL;
 	return __has_rel_mcount(relhdr, shdr0, shstrtab, fname);
 }
@@ -383,9 +477,11 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0,
 {
 	unsigned totrelsz = 0;
 	Elf_Shdr const *shdrp = shdr0;
+	char const *txtname;
 
 	for (; nhdr; --nhdr, ++shdrp) {
-		if (has_rel_mcount(shdrp, shdr0, shstrtab, fname))
+		txtname = has_rel_mcount(shdrp, shdr0, shstrtab, fname);
+		if (txtname && is_mcounted_section_name(txtname))
 			totrelsz += _w(shdrp->sh_size);
 	}
 	return totrelsz;
@@ -421,7 +517,7 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
 	for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) {
 		char const *const txtname = has_rel_mcount(relhdr, shdr0,
 			shstrtab, fname);
-		if (txtname) {
+		if (txtname && is_mcounted_section_name(txtname)) {
 			uint_t recval = 0;
 			unsigned const recsym = find_secsym_ndx(
 				w(relhdr->sh_info), txtname, &recval,
@@ -432,6 +528,12 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
 			mlocp = sift_rel_mcount(mlocp,
 				(void *)mlocp - (void *)mloc0, &mrelp,
 				relhdr, ehdr, recsym, recval, reltype);
+		} else if (txtname && (warn_on_notrace_sect || make_nop)) {
+			/*
+			 * This section is ignored by ftrace, but still
+			 * has mcount calls. Convert them to nops now.
+			 */
+			nop_mcount(relhdr, ehdr, txtname);
 		}
 	}
 	if (mloc0 != mlocp) {
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 4be0deea71ca..858966ab019c 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -134,6 +134,7 @@ my %text_sections = (
      ".sched.text" => 1,
      ".spinlock.text" => 1,
      ".irqentry.text" => 1,
+     ".kprobes.text" => 1,
      ".text.unlikely" => 1,
 );
 
@@ -222,6 +223,7 @@ if ($arch eq "x86_64") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
     $type = ".quad";
     $alignment = 8;
+    $mcount_adjust = -1;
 
     # force flags for this arch
     $ld .= " -m elf_x86_64";
@@ -231,6 +233,7 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "i386") {
     $alignment = 4;
+    $mcount_adjust = -1;
 
     # force flags for this arch
     $ld .= " -m elf_i386";
@@ -240,12 +243,14 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "s390" && $bits == 32) {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$";
+    $mcount_adjust = -4;
     $alignment = 4;
     $ld .= " -m elf_s390";
     $cc .= " -m31";
 
 } elsif ($arch eq "s390" && $bits == 64) {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
+    $mcount_adjust = -8;
     $alignment = 8;
     $type = ".quad";
     $ld .= " -m elf64_s390";
diff --git a/security/capability.c b/security/capability.c
index 2984ea4f776f..bbb51156261b 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -181,7 +181,7 @@ static int cap_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static int cap_inode_permission(struct inode *inode, int mask)
+static int cap_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	return 0;
 }
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index c6ca8662a468..f66baf44f32d 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -69,18 +69,6 @@ error:
 EXPORT_SYMBOL_GPL(user_instantiate);
 
 /*
- * dispose of the old data from an updated user defined key
- */
-static void user_update_rcu_disposal(struct rcu_head *rcu)
-{
-	struct user_key_payload *upayload;
-
-	upayload = container_of(rcu, struct user_key_payload, rcu);
-
-	kfree(upayload);
-}
-
-/*
  * update a user defined key
  * - the key's semaphore is write-locked
  */
@@ -114,7 +102,7 @@ int user_update(struct key *key, const void *data, size_t datalen)
 		key->expiry = 0;
 	}
 
-	call_rcu(&zap->rcu, user_update_rcu_disposal);
+	kfree_rcu(zap, rcu);
 
 error:
 	return ret;
@@ -145,7 +133,7 @@ void user_revoke(struct key *key)
 
 	if (upayload) {
 		rcu_assign_pointer(key->payload.data, NULL);
-		call_rcu(&upayload->rcu, user_update_rcu_disposal);
+		kfree_rcu(upayload, rcu);
 	}
 }
 
diff --git a/security/security.c b/security/security.c
index 101142369db4..4ba6d4cc061f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -518,16 +518,14 @@ int security_inode_permission(struct inode *inode, int mask)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	return security_ops->inode_permission(inode, mask);
+	return security_ops->inode_permission(inode, mask, 0);
 }
 
 int security_inode_exec_permission(struct inode *inode, unsigned int flags)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	if (flags)
-		return -ECHILD;
-	return security_ops->inode_permission(inode, MAY_EXEC);
+	return security_ops->inode_permission(inode, MAY_EXEC, flags);
 }
 
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 9da6420e2056..3d2715fd35ea 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -38,11 +38,7 @@
 #define AVC_CACHE_RECLAIM		16
 
 #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
-#define avc_cache_stats_incr(field)				\
-do {								\
-	per_cpu(avc_cache_stats, get_cpu()).field++;		\
-	put_cpu();						\
-} while (0)
+#define avc_cache_stats_incr(field)	this_cpu_inc(avc_cache_stats.field)
 #else
 #define avc_cache_stats_incr(field)	do {} while (0)
 #endif
@@ -347,11 +343,10 @@ static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass)
 	node = avc_search_node(ssid, tsid, tclass);
 
 	if (node)
-		avc_cache_stats_incr(hits);
-	else
-		avc_cache_stats_incr(misses);
+		return node;
 
-	return node;
+	avc_cache_stats_incr(misses);
+	return NULL;
 }
 
 static int avc_latest_notif_update(int seqno, int is_insert)
@@ -471,6 +466,7 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
  * @avd: access vector decisions
  * @result: result from avc_has_perm_noaudit
  * @a:  auxiliary audit data
+ * @flags: VFS walk flags
  *
  * Audit the granting or denial of permissions in accordance
  * with the policy.  This function is typically called by
@@ -481,9 +477,10 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
  * be performed under a lock, to allow the lock to be released
  * before calling the auditing code.
  */
-void avc_audit(u32 ssid, u32 tsid,
+int avc_audit(u32 ssid, u32 tsid,
 	       u16 tclass, u32 requested,
-	       struct av_decision *avd, int result, struct common_audit_data *a)
+	       struct av_decision *avd, int result, struct common_audit_data *a,
+	       unsigned flags)
 {
 	struct common_audit_data stack_data;
 	u32 denied, audited;
@@ -515,11 +512,24 @@ void avc_audit(u32 ssid, u32 tsid,
 	else
 		audited = requested & avd->auditallow;
 	if (!audited)
-		return;
+		return 0;
+
 	if (!a) {
 		a = &stack_data;
 		COMMON_AUDIT_DATA_INIT(a, NONE);
 	}
+
+	/*
+	 * When in a RCU walk do the audit on the RCU retry.  This is because
+	 * the collection of the dname in an inode audit message is not RCU
+	 * safe.  Note this may drop some audits when the situation changes
+	 * during retry. However this is logically just as if the operation
+	 * happened a little later.
+	 */
+	if ((a->type == LSM_AUDIT_DATA_FS) &&
+	    (flags & IPERM_FLAG_RCU))
+		return -ECHILD;
+
 	a->selinux_audit_data.tclass = tclass;
 	a->selinux_audit_data.requested = requested;
 	a->selinux_audit_data.ssid = ssid;
@@ -529,6 +539,7 @@ void avc_audit(u32 ssid, u32 tsid,
 	a->lsm_pre_audit = avc_audit_pre_callback;
 	a->lsm_post_audit = avc_audit_post_callback;
 	common_lsm_audit(a);
+	return 0;
 }
 
 /**
@@ -753,7 +764,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 	rcu_read_lock();
 
 	node = avc_lookup(ssid, tsid, tclass);
-	if (!node) {
+	if (unlikely(!node)) {
 		rcu_read_unlock();
 
 		if (in_avd)
@@ -793,6 +804,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
  * @tclass: target security class
  * @requested: requested permissions, interpreted based on @tclass
  * @auditdata: auxiliary audit data
+ * @flags: VFS walk flags
  *
  * Check the AVC to determine whether the @requested permissions are granted
  * for the SID pair (@ssid, @tsid), interpreting the permissions
@@ -802,14 +814,19 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
  * permissions are granted, -%EACCES if any permissions are denied, or
  * another -errno upon other errors.
  */
-int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
-		 u32 requested, struct common_audit_data *auditdata)
+int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass,
+		       u32 requested, struct common_audit_data *auditdata,
+		       unsigned flags)
 {
 	struct av_decision avd;
-	int rc;
+	int rc, rc2;
 
 	rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
-	avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata);
+
+	rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata,
+			flags);
+	if (rc2)
+		return rc2;
 	return rc;
 }
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f9c3764e4859..8fb248843009 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1446,8 +1446,11 @@ static int task_has_capability(struct task_struct *tsk,
 	}
 
 	rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd);
-	if (audit == SECURITY_CAP_AUDIT)
-		avc_audit(sid, sid, sclass, av, &avd, rc, &ad);
+	if (audit == SECURITY_CAP_AUDIT) {
+		int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0);
+		if (rc2)
+			return rc2;
+	}
 	return rc;
 }
 
@@ -1467,7 +1470,8 @@ static int task_has_system(struct task_struct *tsk,
 static int inode_has_perm(const struct cred *cred,
 			  struct inode *inode,
 			  u32 perms,
-			  struct common_audit_data *adp)
+			  struct common_audit_data *adp,
+			  unsigned flags)
 {
 	struct inode_security_struct *isec;
 	struct common_audit_data ad;
@@ -1487,7 +1491,7 @@ static int inode_has_perm(const struct cred *cred,
 		ad.u.fs.inode = inode;
 	}
 
-	return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp);
+	return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
 }
 
 /* Same as inode_has_perm, but pass explicit audit data containing
@@ -1504,7 +1508,7 @@ static inline int dentry_has_perm(const struct cred *cred,
 	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.mnt = mnt;
 	ad.u.fs.path.dentry = dentry;
-	return inode_has_perm(cred, inode, av, &ad);
+	return inode_has_perm(cred, inode, av, &ad, 0);
 }
 
 /* Check whether a task can use an open file descriptor to
@@ -1540,7 +1544,7 @@ static int file_has_perm(const struct cred *cred,
 	/* av is zero if only checking access to the descriptor. */
 	rc = 0;
 	if (av)
-		rc = inode_has_perm(cred, inode, av, &ad);
+		rc = inode_has_perm(cred, inode, av, &ad, 0);
 
 out:
 	return rc;
@@ -1574,7 +1578,8 @@ static int may_create(struct inode *dir,
 		return rc;
 
 	if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
-		rc = security_transition_sid(sid, dsec->sid, tclass, NULL, &newsid);
+		rc = security_transition_sid(sid, dsec->sid, tclass,
+					     &dentry->d_name, &newsid);
 		if (rc)
 			return rc;
 	}
@@ -2103,7 +2108,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 			file = file_priv->file;
 			inode = file->f_path.dentry->d_inode;
 			if (inode_has_perm(cred, inode,
-					   FILE__READ | FILE__WRITE, NULL)) {
+					   FILE__READ | FILE__WRITE, NULL, 0)) {
 				drop_tty = 1;
 			}
 		}
@@ -2635,7 +2640,7 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 	return dentry_has_perm(cred, NULL, dentry, FILE__READ);
 }
 
-static int selinux_inode_permission(struct inode *inode, int mask)
+static int selinux_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	const struct cred *cred = current_cred();
 	struct common_audit_data ad;
@@ -2657,7 +2662,7 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 
 	perms = file_mask_to_av(inode->i_mode, mask);
 
-	return inode_has_perm(cred, inode, perms, &ad);
+	return inode_has_perm(cred, inode, perms, &ad, flags);
 }
 
 static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
@@ -3205,7 +3210,7 @@ static int selinux_dentry_open(struct file *file, const struct cred *cred)
 	 * new inode label or new policy.
 	 * This check is not redundant - do not remove.
 	 */
-	return inode_has_perm(cred, inode, open_file_to_av(file), NULL);
+	return inode_has_perm(cred, inode, open_file_to_av(file), NULL, 0);
 }
 
 /* task security operations */
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index 5615081b73ec..47fda963495d 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -41,7 +41,6 @@ struct sk_buff;
  */
 struct avc_cache_stats {
 	unsigned int lookups;
-	unsigned int hits;
 	unsigned int misses;
 	unsigned int allocations;
 	unsigned int reclaims;
@@ -54,11 +53,11 @@ struct avc_cache_stats {
 
 void __init avc_init(void);
 
-void avc_audit(u32 ssid, u32 tsid,
+int avc_audit(u32 ssid, u32 tsid,
 	       u16 tclass, u32 requested,
 	       struct av_decision *avd,
 	       int result,
-	       struct common_audit_data *a);
+	      struct common_audit_data *a, unsigned flags);
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
 int avc_has_perm_noaudit(u32 ssid, u32 tsid,
@@ -66,9 +65,17 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 			 unsigned flags,
 			 struct av_decision *avd);
 
-int avc_has_perm(u32 ssid, u32 tsid,
-		 u16 tclass, u32 requested,
-		 struct common_audit_data *auditdata);
+int avc_has_perm_flags(u32 ssid, u32 tsid,
+		       u16 tclass, u32 requested,
+		       struct common_audit_data *auditdata,
+		       unsigned);
+
+static inline int avc_has_perm(u32 ssid, u32 tsid,
+			       u16 tclass, u32 requested,
+			       struct common_audit_data *auditdata)
+{
+	return avc_has_perm_flags(ssid, tsid, tclass, requested, auditdata, 0);
+}
 
 u32 avc_policy_seqno(void);
 
diff --git a/security/selinux/netif.c b/security/selinux/netif.c
index d6095d63d831..58cc481c93d5 100644
--- a/security/selinux/netif.c
+++ b/security/selinux/netif.c
@@ -104,22 +104,6 @@ static int sel_netif_insert(struct sel_netif *netif)
 }
 
 /**
- * sel_netif_free - Frees an interface entry
- * @p: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that memory allocated to a hash table interface entry can be
- * released safely.
- *
- */
-static void sel_netif_free(struct rcu_head *p)
-{
-	struct sel_netif *netif = container_of(p, struct sel_netif, rcu_head);
-	kfree(netif);
-}
-
-/**
  * sel_netif_destroy - Remove an interface record from the table
  * @netif: the existing interface record
  *
@@ -131,7 +115,7 @@ static void sel_netif_destroy(struct sel_netif *netif)
 {
 	list_del_rcu(&netif->list);
 	sel_netif_total--;
-	call_rcu(&netif->rcu_head, sel_netif_free);
+	kfree_rcu(netif, rcu_head);
 }
 
 /**
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index ea39cb742ae5..c0e1a0f52462 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1380,10 +1380,14 @@ static int sel_avc_stats_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_printf(seq, "lookups hits misses allocations reclaims "
 			   "frees\n");
-	else
-		seq_printf(seq, "%u %u %u %u %u %u\n", st->lookups,
-			   st->hits, st->misses, st->allocations,
+	else {
+		unsigned int lookups = st->lookups;
+		unsigned int misses = st->misses;
+		unsigned int hits = lookups - misses;
+		seq_printf(seq, "%u %u %u %u %u %u\n", lookups,
+			   hits, misses, st->allocations,
 			   st->reclaims, st->frees);
+	}
 	return 0;
 }
 
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index e7b850ad57ee..7102457661d6 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -502,7 +502,7 @@ static int policydb_index(struct policydb *p)
 		goto out;
 
 	rc = flex_array_prealloc(p->type_val_to_struct_array, 0,
-				 p->p_types.nprim - 1, GFP_KERNEL | __GFP_ZERO);
+				 p->p_types.nprim, GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto out;
 
@@ -519,7 +519,7 @@ static int policydb_index(struct policydb *p)
 			goto out;
 
 		rc = flex_array_prealloc(p->sym_val_to_name[i],
-					 0, p->symtab[i].nprim - 1,
+					 0, p->symtab[i].nprim,
 					 GFP_KERNEL | __GFP_ZERO);
 		if (rc)
 			goto out;
@@ -1819,8 +1819,6 @@ static int filename_trans_read(struct policydb *p, void *fp)
 		goto out;
 	nel = le32_to_cpu(buf[0]);
 
-	printk(KERN_ERR "%s: nel=%d\n", __func__, nel);
-
 	last = p->filename_trans;
 	while (last && last->next)
 		last = last->next;
@@ -1857,8 +1855,6 @@ static int filename_trans_read(struct policydb *p, void *fp)
 			goto out;
 		name[len] = 0;
 
-		printk(KERN_ERR "%s: ft=%p ft->name=%p ft->name=%s\n", __func__, ft, ft->name, ft->name);
-
 		rc = next_entry(buf, fp, sizeof(u32) * 4);
 		if (rc)
 			goto out;
@@ -2375,7 +2371,7 @@ int policydb_read(struct policydb *p, void *fp)
 		goto bad;
 
 	/* preallocate so we don't have to worry about the put ever failing */
-	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim - 1,
+	rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim,
 				 GFP_KERNEL | __GFP_ZERO);
 	if (rc)
 		goto bad;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index c6f8fcadae07..400a5d5cde61 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -686,7 +686,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * Returns 0 if access is permitted, -EACCES otherwise
  */
-static int smack_inode_permission(struct inode *inode, int mask)
+static int smack_inode_permission(struct inode *inode, int mask, unsigned flags)
 {
 	struct smk_audit_info ad;
 
@@ -696,6 +696,10 @@ static int smack_inode_permission(struct inode *inode, int mask)
 	 */
 	if (mask == 0)
 		return 0;
+
+	/* May be droppable after audit */
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
 	smk_ad_setfield_u_fs_inode(&ad, inode);
 	return smk_curacc(smk_of_inode(inode), mask, &ad);
diff --git a/sound/aoa/codecs/tas.c b/sound/aoa/codecs/tas.c
index 58804c7acfcf..fd2188c3df2b 100644
--- a/sound/aoa/codecs/tas.c
+++ b/sound/aoa/codecs/tas.c
@@ -170,7 +170,7 @@ static void tas_set_volume(struct tas *tas)
 	/* analysing the volume and mixer tables shows
 	 * that they are similar enough when we shift
 	 * the mixer table down by 4 bits. The error
-	 * is minuscule, in just one item the error
+	 * is miniscule, in just one item the error
 	 * is 1, at a value of 0x07f17b (mixer table
 	 * value is 0x07f17a) */
 	tmp = tas_gaintable[left];
diff --git a/sound/pci/au88x0/au88x0_pcm.c b/sound/pci/au88x0/au88x0_pcm.c
index 33f0ba5559a7..62e959120c44 100644
--- a/sound/pci/au88x0/au88x0_pcm.c
+++ b/sound/pci/au88x0/au88x0_pcm.c
@@ -44,10 +44,10 @@ static struct snd_pcm_hardware snd_vortex_playback_hw_adb = {
 	.channels_min = 1,
 	.channels_max = 2,
 	.buffer_bytes_max = 0x10000,
-	.period_bytes_min = 0x1,
+	.period_bytes_min = 0x20,
 	.period_bytes_max = 0x1000,
 	.periods_min = 2,
-	.periods_max = 32,
+	.periods_max = 1024,
 };
 
 #ifndef CHIP_AU8820
@@ -140,6 +140,9 @@ static int snd_vortex_pcm_open(struct snd_pcm_substream *substream)
 					SNDRV_PCM_HW_PARAM_PERIOD_BYTES)) < 0)
 		return err;
 
+	snd_pcm_hw_constraint_step(runtime, 0,
+					SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 64);
+
 	if (VORTEX_PCM_TYPE(substream->pcm) != VORTEX_PCM_WT) {
 #ifndef CHIP_AU8820
 		if (VORTEX_PCM_TYPE(substream->pcm) == VORTEX_PCM_A3D) {
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 430f41db6044..759ade12e758 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -937,6 +937,7 @@ void snd_hda_shutup_pins(struct hda_codec *codec)
 }
 EXPORT_SYMBOL_HDA(snd_hda_shutup_pins);
 
+#ifdef SND_HDA_NEEDS_RESUME
 /* Restore the pin controls cleared previously via snd_hda_shutup_pins() */
 static void restore_shutup_pins(struct hda_codec *codec)
 {
@@ -953,6 +954,7 @@ static void restore_shutup_pins(struct hda_codec *codec)
 	}
 	codec->pins_shutup = 0;
 }
+#endif
 
 static void init_hda_cache(struct hda_cache_rec *cache,
 			   unsigned int record_size);
@@ -1329,6 +1331,7 @@ static void purify_inactive_streams(struct hda_codec *codec)
 	}
 }
 
+#ifdef SND_HDA_NEEDS_RESUME
 /* clean up all streams; called from suspend */
 static void hda_cleanup_all_streams(struct hda_codec *codec)
 {
@@ -1340,6 +1343,7 @@ static void hda_cleanup_all_streams(struct hda_codec *codec)
 			really_cleanup_stream(codec, p);
 	}
 }
+#endif
 
 /*
  * amp access functions
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 52928d9a72da..c82979a8cd09 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1704,11 +1704,11 @@ static void alc_apply_fixup(struct hda_codec *codec, int action)
 				   codec->chip_name, fix->type);
 			break;
 		}
-		if (!fix[id].chained)
+		if (!fix->chained)
 			break;
 		if (++depth > 10)
 			break;
-		id = fix[id].chain_id;
+		id = fix->chain_id;
 	}
 }
 
@@ -5645,6 +5645,7 @@ static void fillup_priv_adc_nids(struct hda_codec *codec, hda_nid_t *nids,
 static struct snd_pci_quirk beep_white_list[] = {
 	SND_PCI_QUIRK(0x1043, 0x829f, "ASUS", 1),
 	SND_PCI_QUIRK(0x1043, 0x83ce, "EeePC", 1),
+	SND_PCI_QUIRK(0x1043, 0x831a, "EeePC", 1),
 	SND_PCI_QUIRK(0x8086, 0xd613, "Intel", 1),
 	{}
 };
@@ -9863,6 +9864,7 @@ static struct snd_pci_quirk alc882_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC883_LAPTOP_EAPD),
 	SND_PCI_QUIRK(0x10f1, 0x2350, "TYAN-S2350", ALC888_6ST_DELL),
 	SND_PCI_QUIRK(0x108e, 0x534d, NULL, ALC883_3ST_6ch),
+	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte P35 DS3R", ALC882_6ST_DIG),
 
 	SND_PCI_QUIRK(0x1462, 0x0349, "MSI", ALC883_TARGA_2ch_DIG),
 	SND_PCI_QUIRK(0x1462, 0x040d, "MSI", ALC883_TARGA_2ch_DIG),
@@ -10699,7 +10701,6 @@ enum {
 	PINFIX_LENOVO_Y530,
 	PINFIX_PB_M5210,
 	PINFIX_ACER_ASPIRE_7736,
-	PINFIX_GIGABYTE_880GM,
 };
 
 static const struct alc_fixup alc882_fixups[] = {
@@ -10731,13 +10732,6 @@ static const struct alc_fixup alc882_fixups[] = {
 		.type = ALC_FIXUP_SKU,
 		.v.sku = ALC_FIXUP_SKU_IGNORE,
 	},
-	[PINFIX_GIGABYTE_880GM] = {
-		.type = ALC_FIXUP_PINS,
-		.v.pins = (const struct alc_pincfg[]) {
-			{ 0x14, 0x1114410 }, /* set as speaker */
-			{ }
-		}
-	},
 };
 
 static struct snd_pci_quirk alc882_fixup_tbl[] = {
@@ -10745,7 +10739,6 @@ static struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", PINFIX_LENOVO_Y530),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", PINFIX_ABIT_AW9D_MAX),
 	SND_PCI_QUIRK(0x1025, 0x0296, "Acer Aspire 7736z", PINFIX_ACER_ASPIRE_7736),
-	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte", PINFIX_GIGABYTE_880GM),
 	{}
 };
 
@@ -14868,6 +14861,23 @@ static void alc269_fixup_hweq(struct hda_codec *codec,
 	alc_write_coef_idx(codec, 0x1e, coef | 0x80);
 }
 
+static void alc271_fixup_dmic(struct hda_codec *codec,
+			      const struct alc_fixup *fix, int action)
+{
+	static struct hda_verb verbs[] = {
+		{0x20, AC_VERB_SET_COEF_INDEX, 0x0d},
+		{0x20, AC_VERB_SET_PROC_COEF, 0x4000},
+		{}
+	};
+	unsigned int cfg;
+
+	if (strcmp(codec->chip_name, "ALC271X"))
+		return;
+	cfg = snd_hda_codec_get_pincfg(codec, 0x12);
+	if (get_defcfg_connect(cfg) == AC_JACK_PORT_FIXED)
+		snd_hda_sequence_write(codec, verbs);
+}
+
 enum {
 	ALC269_FIXUP_SONY_VAIO,
 	ALC275_FIXUP_SONY_VAIO_GPIO2,
@@ -14876,6 +14886,7 @@ enum {
 	ALC269_FIXUP_ASUS_G73JW,
 	ALC269_FIXUP_LENOVO_EAPD,
 	ALC275_FIXUP_SONY_HWEQ,
+	ALC271_FIXUP_DMIC,
 };
 
 static const struct alc_fixup alc269_fixups[] = {
@@ -14929,7 +14940,11 @@ static const struct alc_fixup alc269_fixups[] = {
 		.v.func = alc269_fixup_hweq,
 		.chained = true,
 		.chain_id = ALC275_FIXUP_SONY_VAIO_GPIO2
-	}
+	},
+	[ALC271_FIXUP_DMIC] = {
+		.type = ALC_FIXUP_FUNC,
+		.v.func = alc271_fixup_dmic,
+	},
 };
 
 static struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -14938,6 +14953,7 @@ static struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
 	SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
+	SND_PCI_QUIRK_VENDOR(0x1025, "Acer Aspire", ALC271_FIXUP_DMIC),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
@@ -18782,6 +18798,8 @@ static struct snd_pci_quirk alc662_cfg_tbl[] = {
 		      ALC662_3ST_6ch_DIG),
 	SND_PCI_QUIRK(0x1179, 0xff6e, "Toshiba NB20x", ALC662_AUTO),
 	SND_PCI_QUIRK(0x144d, 0xca00, "Samsung NC10", ALC272_SAMSUNG_NC10),
+	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte 945GCM-S2L",
+		      ALC662_3ST_6ch_DIG),
 	SND_PCI_QUIRK(0x152d, 0x2304, "Quanta WH1", ALC663_ASUS_H13),
 	SND_PCI_QUIRK(0x1565, 0x820f, "Biostar TA780G M2+", ALC662_3ST_6ch_DIG),
 	SND_PCI_QUIRK(0x1631, 0xc10c, "PB RS65", ALC663_ASUS_M51VA),
@@ -19455,7 +19473,7 @@ enum {
 	ALC662_FIXUP_IDEAPAD,
 	ALC272_FIXUP_MARIO,
 	ALC662_FIXUP_CZC_P10T,
-	ALC662_FIXUP_GIGABYTE,
+	ALC662_FIXUP_SKU_IGNORE,
 };
 
 static const struct alc_fixup alc662_fixups[] = {
@@ -19484,20 +19502,17 @@ static const struct alc_fixup alc662_fixups[] = {
 			{}
 		}
 	},
-	[ALC662_FIXUP_GIGABYTE] = {
-		.type = ALC_FIXUP_PINS,
-		.v.pins = (const struct alc_pincfg[]) {
-			{ 0x14, 0x1114410 }, /* set as speaker */
-			{ }
-		}
+	[ALC662_FIXUP_SKU_IGNORE] = {
+		.type = ALC_FIXUP_SKU,
+		.v.sku = ALC_FIXUP_SKU_IGNORE,
 	},
 };
 
 static struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE),
+	SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE),
 	SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD),
-	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte", ALC662_FIXUP_GIGABYTE),
 	SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD),
 	SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD),
 	SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T),
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 1371b57c11e8..0997031c48d2 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1292,14 +1292,18 @@ static void notify_aa_path_ctls(struct hda_codec *codec)
 {
 	int i;
 	struct snd_ctl_elem_id id;
-	const char *labels[] = {"Mic", "Front Mic", "Line"};
+	const char *labels[] = {"Mic", "Front Mic", "Line", "Rear Mic"};
+	struct snd_kcontrol *ctl;
 
 	memset(&id, 0, sizeof(id));
 	id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
 	for (i = 0; i < ARRAY_SIZE(labels); i++) {
 		sprintf(id.name, "%s Playback Volume", labels[i]);
-		snd_ctl_notify(codec->bus->card, SNDRV_CTL_EVENT_MASK_VALUE,
-			       &id);
+		ctl = snd_hda_find_mixer_ctl(codec, id.name);
+		if (ctl)
+			snd_ctl_notify(codec->bus->card,
+					SNDRV_CTL_EVENT_MASK_VALUE,
+					&ctl->id);
 	}
 }
 
diff --git a/sound/soc/codecs/jz4740.c b/sound/soc/codecs/jz4740.c
index f7cd346fd727..f5ccdbf7ebc6 100644
--- a/sound/soc/codecs/jz4740.c
+++ b/sound/soc/codecs/jz4740.c
@@ -308,8 +308,6 @@ static int jz4740_codec_dev_probe(struct snd_soc_codec *codec)
 	snd_soc_dapm_add_routes(dapm, jz4740_codec_dapm_routes,
 		ARRAY_SIZE(jz4740_codec_dapm_routes));
 
-	snd_soc_dapm_new_widgets(codec);
-
 	jz4740_codec_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
 	return 0;
diff --git a/sound/soc/codecs/sn95031.c b/sound/soc/codecs/sn95031.c
index a54d2a5b28f6..4d9fb279e146 100644
--- a/sound/soc/codecs/sn95031.c
+++ b/sound/soc/codecs/sn95031.c
@@ -927,7 +927,7 @@ static struct platform_driver sn95031_codec_driver = {
 		.owner		= THIS_MODULE,
 	},
 	.probe		= sn95031_device_probe,
-	.remove		= sn95031_device_remove,
+	.remove		= __devexit_p(sn95031_device_remove),
 };
 
 static int __init sn95031_init(void)
diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
index 2727befd158e..b04d28039c16 100644
--- a/sound/soc/codecs/ssm2602.c
+++ b/sound/soc/codecs/ssm2602.c
@@ -139,7 +139,7 @@ SOC_DOUBLE_R("Capture Volume", SSM2602_LINVOL, SSM2602_RINVOL, 0, 31, 0),
 SOC_DOUBLE_R("Capture Switch", SSM2602_LINVOL, SSM2602_RINVOL, 7, 1, 1),
 
 SOC_SINGLE("Mic Boost (+20dB)", SSM2602_APANA, 0, 1, 0),
-SOC_SINGLE("Mic Boost2 (+20dB)", SSM2602_APANA, 7, 1, 0),
+SOC_SINGLE("Mic Boost2 (+20dB)", SSM2602_APANA, 8, 1, 0),
 SOC_SINGLE("Mic Switch", SSM2602_APANA, 1, 1, 1),
 
 SOC_SINGLE("Sidetone Playback Volume", SSM2602_APANA, 6, 3, 1),
@@ -602,7 +602,7 @@ static struct snd_soc_codec_driver soc_codec_dev_ssm2602 = {
 	.read = ssm2602_read_reg_cache,
 	.write = ssm2602_write,
 	.set_bias_level = ssm2602_set_bias_level,
-	.reg_cache_size = sizeof(ssm2602_reg),
+	.reg_cache_size = ARRAY_SIZE(ssm2602_reg),
 	.reg_word_size = sizeof(u16),
 	.reg_cache_default = ssm2602_reg,
 };
@@ -614,7 +614,7 @@ static struct snd_soc_codec_driver soc_codec_dev_ssm2602 = {
  *    low  = 0x1a
  *    high = 0x1b
  */
-static int ssm2602_i2c_probe(struct i2c_client *i2c,
+static int __devinit ssm2602_i2c_probe(struct i2c_client *i2c,
 			     const struct i2c_device_id *id)
 {
 	struct ssm2602_priv *ssm2602;
@@ -635,7 +635,7 @@ static int ssm2602_i2c_probe(struct i2c_client *i2c,
 	return ret;
 }
 
-static int ssm2602_i2c_remove(struct i2c_client *client)
+static int __devexit ssm2602_i2c_remove(struct i2c_client *client)
 {
 	snd_soc_unregister_codec(&client->dev);
 	kfree(i2c_get_clientdata(client));
@@ -655,7 +655,7 @@ static struct i2c_driver ssm2602_i2c_driver = {
 		.owner = THIS_MODULE,
 	},
 	.probe = ssm2602_i2c_probe,
-	.remove = ssm2602_i2c_remove,
+	.remove = __devexit_p(ssm2602_i2c_remove),
 	.id_table = ssm2602_i2c_id,
 };
 #endif
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index 48ffd406a71d..a7b8f301bad3 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -601,9 +601,7 @@ static struct snd_soc_codec_driver soc_codec_dev_uda134x = {
 	.reg_cache_step = 1,
 	.read = uda134x_read_reg_cache,
 	.write = uda134x_write,
-#ifdef POWER_OFF_ON_STANDBY
 	.set_bias_level = uda134x_set_bias_level,
-#endif
 };
 
 static int __devinit uda134x_codec_probe(struct platform_device *pdev)
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index ae1cadfae84c..824d1c8c8a35 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -247,8 +247,6 @@ static int wm8903_volatile_register(struct snd_soc_codec *codec, unsigned int re
 	case WM8903_REVISION_NUMBER:
 	case WM8903_INTERRUPT_STATUS_1:
 	case WM8903_WRITE_SEQUENCER_4:
-	case WM8903_POWER_MANAGEMENT_3:
-	case WM8903_POWER_MANAGEMENT_2:
 	case WM8903_DC_SERVO_READBACK_1:
 	case WM8903_DC_SERVO_READBACK_2:
 	case WM8903_DC_SERVO_READBACK_3:
@@ -694,7 +692,7 @@ SOC_ENUM("DRC Smoothing Threshold", drc_smoothing),
 SOC_SINGLE_TLV("DRC Startup Volume", WM8903_DRC_0, 6, 18, 0, drc_tlv_startup),
 
 SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT,
-		 WM8903_ADC_DIGITAL_VOLUME_RIGHT, 1, 96, 0, digital_tlv),
+		 WM8903_ADC_DIGITAL_VOLUME_RIGHT, 1, 120, 0, digital_tlv),
 SOC_ENUM("ADC Companding Mode", adc_companding),
 SOC_SINGLE("ADC Companding Switch", WM8903_AUDIO_INTERFACE_0, 3, 1, 0),
 
@@ -875,34 +873,40 @@ SND_SOC_DAPM_MIXER("Left Speaker Mixer", WM8903_POWER_MANAGEMENT_4, 1, 0,
 SND_SOC_DAPM_MIXER("Right Speaker Mixer", WM8903_POWER_MANAGEMENT_4, 0, 0,
 		   right_speaker_mixer, ARRAY_SIZE(right_speaker_mixer)),
 
-SND_SOC_DAPM_PGA_S("Left Headphone Output PGA", 0, WM8903_ANALOGUE_HP_0,
-		   4, 0, NULL, 0),
-SND_SOC_DAPM_PGA_S("Right Headphone Output PGA", 0, WM8903_ANALOGUE_HP_0,
+SND_SOC_DAPM_PGA_S("Left Headphone Output PGA", 0, WM8903_POWER_MANAGEMENT_2,
+		   1, 0, NULL, 0),
+SND_SOC_DAPM_PGA_S("Right Headphone Output PGA", 0, WM8903_POWER_MANAGEMENT_2,
 		   0, 0, NULL, 0),
 
-SND_SOC_DAPM_PGA_S("Left Line Output PGA", 0, WM8903_ANALOGUE_LINEOUT_0, 4, 0,
+SND_SOC_DAPM_PGA_S("Left Line Output PGA", 0, WM8903_POWER_MANAGEMENT_3, 1, 0,
 		   NULL, 0),
-SND_SOC_DAPM_PGA_S("Right Line Output PGA", 0, WM8903_ANALOGUE_LINEOUT_0, 0, 0,
+SND_SOC_DAPM_PGA_S("Right Line Output PGA", 0, WM8903_POWER_MANAGEMENT_3, 0, 0,
 		   NULL, 0),
 
 SND_SOC_DAPM_PGA_S("HPL_RMV_SHORT", 4, WM8903_ANALOGUE_HP_0, 7, 0, NULL, 0),
 SND_SOC_DAPM_PGA_S("HPL_ENA_OUTP", 3, WM8903_ANALOGUE_HP_0, 6, 0, NULL, 0),
-SND_SOC_DAPM_PGA_S("HPL_ENA_DLY", 1, WM8903_ANALOGUE_HP_0, 5, 0, NULL, 0),
+SND_SOC_DAPM_PGA_S("HPL_ENA_DLY", 2, WM8903_ANALOGUE_HP_0, 5, 0, NULL, 0),
+SND_SOC_DAPM_PGA_S("HPL_ENA", 1, WM8903_ANALOGUE_HP_0, 4, 0, NULL, 0),
 SND_SOC_DAPM_PGA_S("HPR_RMV_SHORT", 4, WM8903_ANALOGUE_HP_0, 3, 0, NULL, 0),
 SND_SOC_DAPM_PGA_S("HPR_ENA_OUTP", 3, WM8903_ANALOGUE_HP_0, 2, 0, NULL, 0),
-SND_SOC_DAPM_PGA_S("HPR_ENA_DLY", 1, WM8903_ANALOGUE_HP_0, 1, 0, NULL, 0),
+SND_SOC_DAPM_PGA_S("HPR_ENA_DLY", 2, WM8903_ANALOGUE_HP_0, 1, 0, NULL, 0),
+SND_SOC_DAPM_PGA_S("HPR_ENA", 1, WM8903_ANALOGUE_HP_0, 0, 0, NULL, 0),
 
 SND_SOC_DAPM_PGA_S("LINEOUTL_RMV_SHORT", 4, WM8903_ANALOGUE_LINEOUT_0, 7, 0,
 		   NULL, 0),
 SND_SOC_DAPM_PGA_S("LINEOUTL_ENA_OUTP", 3, WM8903_ANALOGUE_LINEOUT_0, 6, 0,
 		   NULL, 0),
-SND_SOC_DAPM_PGA_S("LINEOUTL_ENA_DLY", 1, WM8903_ANALOGUE_LINEOUT_0, 5, 0,
+SND_SOC_DAPM_PGA_S("LINEOUTL_ENA_DLY", 2, WM8903_ANALOGUE_LINEOUT_0, 5, 0,
+		   NULL, 0),
+SND_SOC_DAPM_PGA_S("LINEOUTL_ENA", 1, WM8903_ANALOGUE_LINEOUT_0, 4, 0,
 		   NULL, 0),
 SND_SOC_DAPM_PGA_S("LINEOUTR_RMV_SHORT", 4, WM8903_ANALOGUE_LINEOUT_0, 3, 0,
 		   NULL, 0),
 SND_SOC_DAPM_PGA_S("LINEOUTR_ENA_OUTP", 3, WM8903_ANALOGUE_LINEOUT_0, 2, 0,
 		   NULL, 0),
-SND_SOC_DAPM_PGA_S("LINEOUTR_ENA_DLY", 1, WM8903_ANALOGUE_LINEOUT_0, 1, 0,
+SND_SOC_DAPM_PGA_S("LINEOUTR_ENA_DLY", 2, WM8903_ANALOGUE_LINEOUT_0, 1, 0,
+		   NULL, 0),
+SND_SOC_DAPM_PGA_S("LINEOUTR_ENA", 1, WM8903_ANALOGUE_LINEOUT_0, 0, 0,
 		   NULL, 0),
 
 SND_SOC_DAPM_SUPPLY("DCS Master", WM8903_DC_SERVO_0, 4, 0, NULL, 0),
@@ -1037,10 +1041,14 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{ "Left Speaker PGA", NULL, "Left Speaker Mixer" },
 	{ "Right Speaker PGA", NULL, "Right Speaker Mixer" },
 
-	{ "HPL_ENA_DLY", NULL, "Left Headphone Output PGA" },
-	{ "HPR_ENA_DLY", NULL, "Right Headphone Output PGA" },
-	{ "LINEOUTL_ENA_DLY", NULL, "Left Line Output PGA" },
-	{ "LINEOUTR_ENA_DLY", NULL, "Right Line Output PGA" },
+	{ "HPL_ENA", NULL, "Left Headphone Output PGA" },
+	{ "HPR_ENA", NULL, "Right Headphone Output PGA" },
+	{ "HPL_ENA_DLY", NULL, "HPL_ENA" },
+	{ "HPR_ENA_DLY", NULL, "HPR_ENA" },
+	{ "LINEOUTL_ENA", NULL, "Left Line Output PGA" },
+	{ "LINEOUTR_ENA", NULL, "Right Line Output PGA" },
+	{ "LINEOUTL_ENA_DLY", NULL, "LINEOUTL_ENA" },
+	{ "LINEOUTR_ENA_DLY", NULL, "LINEOUTR_ENA" },
 
 	{ "HPL_DCS", NULL, "DCS Master" },
 	{ "HPR_DCS", NULL, "DCS Master" },
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 3290333b2bb9..84e1bd1d2822 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3261,20 +3261,36 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 	wm8994_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
 	/* Latch volume updates (right only; we always do left then right). */
+	snd_soc_update_bits(codec, WM8994_AIF1_DAC1_LEFT_VOLUME,
+			    WM8994_AIF1DAC1_VU, WM8994_AIF1DAC1_VU);
 	snd_soc_update_bits(codec, WM8994_AIF1_DAC1_RIGHT_VOLUME,
 			    WM8994_AIF1DAC1_VU, WM8994_AIF1DAC1_VU);
+	snd_soc_update_bits(codec, WM8994_AIF1_DAC2_LEFT_VOLUME,
+			    WM8994_AIF1DAC2_VU, WM8994_AIF1DAC2_VU);
 	snd_soc_update_bits(codec, WM8994_AIF1_DAC2_RIGHT_VOLUME,
 			    WM8994_AIF1DAC2_VU, WM8994_AIF1DAC2_VU);
+	snd_soc_update_bits(codec, WM8994_AIF2_DAC_LEFT_VOLUME,
+			    WM8994_AIF2DAC_VU, WM8994_AIF2DAC_VU);
 	snd_soc_update_bits(codec, WM8994_AIF2_DAC_RIGHT_VOLUME,
 			    WM8994_AIF2DAC_VU, WM8994_AIF2DAC_VU);
+	snd_soc_update_bits(codec, WM8994_AIF1_ADC1_LEFT_VOLUME,
+			    WM8994_AIF1ADC1_VU, WM8994_AIF1ADC1_VU);
 	snd_soc_update_bits(codec, WM8994_AIF1_ADC1_RIGHT_VOLUME,
 			    WM8994_AIF1ADC1_VU, WM8994_AIF1ADC1_VU);
+	snd_soc_update_bits(codec, WM8994_AIF1_ADC2_LEFT_VOLUME,
+			    WM8994_AIF1ADC2_VU, WM8994_AIF1ADC2_VU);
 	snd_soc_update_bits(codec, WM8994_AIF1_ADC2_RIGHT_VOLUME,
 			    WM8994_AIF1ADC2_VU, WM8994_AIF1ADC2_VU);
+	snd_soc_update_bits(codec, WM8994_AIF2_ADC_LEFT_VOLUME,
+			    WM8994_AIF2ADC_VU, WM8994_AIF1ADC2_VU);
 	snd_soc_update_bits(codec, WM8994_AIF2_ADC_RIGHT_VOLUME,
 			    WM8994_AIF2ADC_VU, WM8994_AIF1ADC2_VU);
+	snd_soc_update_bits(codec, WM8994_DAC1_LEFT_VOLUME,
+			    WM8994_DAC1_VU, WM8994_DAC1_VU);
 	snd_soc_update_bits(codec, WM8994_DAC1_RIGHT_VOLUME,
 			    WM8994_DAC1_VU, WM8994_DAC1_VU);
+	snd_soc_update_bits(codec, WM8994_DAC2_LEFT_VOLUME,
+			    WM8994_DAC2_VU, WM8994_DAC2_VU);
 	snd_soc_update_bits(codec, WM8994_DAC2_RIGHT_VOLUME,
 			    WM8994_DAC2_VU, WM8994_DAC2_VU);
 
diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c
index 7b6b3c18e299..4005e9af5d61 100644
--- a/sound/soc/codecs/wm_hubs.c
+++ b/sound/soc/codecs/wm_hubs.c
@@ -740,12 +740,12 @@ static const struct snd_soc_dapm_route analogue_routes[] = {
 
 	{ "SPKL", "Input Switch", "MIXINL" },
 	{ "SPKL", "IN1LP Switch", "IN1LP" },
-	{ "SPKL", "Output Switch", "Left Output Mixer" },
+	{ "SPKL", "Output Switch", "Left Output PGA" },
 	{ "SPKL", NULL, "TOCLK" },
 
 	{ "SPKR", "Input Switch", "MIXINR" },
 	{ "SPKR", "IN1RP Switch", "IN1RP" },
-	{ "SPKR", "Output Switch", "Right Output Mixer" },
+	{ "SPKR", "Output Switch", "Right Output PGA" },
 	{ "SPKR", NULL, "TOCLK" },
 
 	{ "SPKL Boost", "Direct Voice Switch", "Direct Voice" },
@@ -767,8 +767,8 @@ static const struct snd_soc_dapm_route analogue_routes[] = {
 	{ "SPKOUTRP", NULL, "SPKR Driver" },
 	{ "SPKOUTRN", NULL, "SPKR Driver" },
 
-	{ "Left Headphone Mux", "Mixer", "Left Output Mixer" },
-	{ "Right Headphone Mux", "Mixer", "Right Output Mixer" },
+	{ "Left Headphone Mux", "Mixer", "Left Output PGA" },
+	{ "Right Headphone Mux", "Mixer", "Right Output PGA" },
 
 	{ "Headphone PGA", NULL, "Left Headphone Mux" },
 	{ "Headphone PGA", NULL, "Right Headphone Mux" },
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index a5af834c8ef5..4ddc6d3b6678 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -434,17 +434,21 @@ static int davinci_mcasp_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		mcasp_set_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE);
 		mcasp_set_bits(base + DAVINCI_MCASP_RXFMCTL_REG, AFSRE);
 
-		mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG, (0x7 << 26));
+		mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG,
+				ACLKX | AHCLKX | AFSX);
 		break;
 	case SND_SOC_DAIFMT_CBM_CFS:
 		/* codec is clock master and frame slave */
-		mcasp_set_bits(base + DAVINCI_MCASP_ACLKXCTL_REG, ACLKXE);
+		mcasp_clr_bits(base + DAVINCI_MCASP_ACLKXCTL_REG, ACLKXE);
 		mcasp_set_bits(base + DAVINCI_MCASP_TXFMCTL_REG, AFSXE);
 
-		mcasp_set_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE);
+		mcasp_clr_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE);
 		mcasp_set_bits(base + DAVINCI_MCASP_RXFMCTL_REG, AFSRE);
 
-		mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG, (0x2d << 26));
+		mcasp_clr_bits(base + DAVINCI_MCASP_PDIR_REG,
+				ACLKX | ACLKR);
+		mcasp_set_bits(base + DAVINCI_MCASP_PDIR_REG,
+				AFSX | AFSR);
 		break;
 	case SND_SOC_DAIFMT_CBM_CFM:
 		/* codec is clock and frame master */
@@ -454,7 +458,8 @@ static int davinci_mcasp_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		mcasp_clr_bits(base + DAVINCI_MCASP_ACLKRCTL_REG, ACLKRE);
 		mcasp_clr_bits(base + DAVINCI_MCASP_RXFMCTL_REG, AFSRE);
 
-		mcasp_clr_bits(base + DAVINCI_MCASP_PDIR_REG, (0x3f << 26));
+		mcasp_clr_bits(base + DAVINCI_MCASP_PDIR_REG,
+				ACLKX | AHCLKX | AFSX | ACLKR | AHCLKR | AFSR);
 		break;
 
 	default:
@@ -644,7 +649,7 @@ static void davinci_hw_param(struct davinci_audio_dev *dev, int stream)
 		mcasp_set_reg(dev->base + DAVINCI_MCASP_TXTDM_REG, mask);
 		mcasp_set_bits(dev->base + DAVINCI_MCASP_TXFMT_REG, TXORD);
 
-		if ((dev->tdm_slots >= 2) || (dev->tdm_slots <= 32))
+		if ((dev->tdm_slots >= 2) && (dev->tdm_slots <= 32))
 			mcasp_mod_bits(dev->base + DAVINCI_MCASP_TXFMCTL_REG,
 					FSXMOD(dev->tdm_slots), FSXMOD(0x1FF));
 		else
@@ -660,7 +665,7 @@ static void davinci_hw_param(struct davinci_audio_dev *dev, int stream)
 				AHCLKRE);
 		mcasp_set_reg(dev->base + DAVINCI_MCASP_RXTDM_REG, mask);
 
-		if ((dev->tdm_slots >= 2) || (dev->tdm_slots <= 32))
+		if ((dev->tdm_slots >= 2) && (dev->tdm_slots <= 32))
 			mcasp_mod_bits(dev->base + DAVINCI_MCASP_RXFMCTL_REG,
 					FSRMOD(dev->tdm_slots), FSRMOD(0x1FF));
 		else
diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c
index 419bf4f5534a..cd22a54b2f14 100644
--- a/sound/soc/jz4740/jz4740-i2s.c
+++ b/sound/soc/jz4740/jz4740-i2s.c
@@ -133,7 +133,7 @@ static void jz4740_i2s_shutdown(struct snd_pcm_substream *substream,
 	struct jz4740_i2s *i2s = snd_soc_dai_get_drvdata(dai);
 	uint32_t conf;
 
-	if (!dai->active)
+	if (dai->active)
 		return;
 
 	conf = jz4740_i2s_read(i2s, JZ_REG_AIC_CONF);
diff --git a/sound/soc/mid-x86/sst_platform.c b/sound/soc/mid-x86/sst_platform.c
index b2e9198a983a..6b1f9d3bf34e 100644
--- a/sound/soc/mid-x86/sst_platform.c
+++ b/sound/soc/mid-x86/sst_platform.c
@@ -116,18 +116,20 @@ struct snd_soc_dai_driver sst_platform_dai[] = {
 static inline void sst_set_stream_status(struct sst_runtime_stream *stream,
 					int state)
 {
-	spin_lock(&stream->status_lock);
+	unsigned long flags;
+	spin_lock_irqsave(&stream->status_lock, flags);
 	stream->stream_status = state;
-	spin_unlock(&stream->status_lock);
+	spin_unlock_irqrestore(&stream->status_lock, flags);
 }
 
 static inline int sst_get_stream_status(struct sst_runtime_stream *stream)
 {
 	int state;
+	unsigned long flags;
 
-	spin_lock(&stream->status_lock);
+	spin_lock_irqsave(&stream->status_lock, flags);
 	state = stream->stream_status;
-	spin_unlock(&stream->status_lock);
+	spin_unlock_irqrestore(&stream->status_lock, flags);
 	return state;
 }
 
@@ -374,6 +376,11 @@ static int sst_platform_pcm_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
+static int sst_platform_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	return snd_pcm_lib_free_pages(substream);
+}
+
 static struct snd_pcm_ops sst_platform_ops = {
 	.open = sst_platform_open,
 	.close = sst_platform_close,
@@ -382,6 +389,7 @@ static struct snd_pcm_ops sst_platform_ops = {
 	.trigger = sst_platform_pcm_trigger,
 	.pointer = sst_platform_pcm_pointer,
 	.hw_params = sst_platform_pcm_hw_params,
+	.hw_free = sst_platform_pcm_hw_free,
 };
 
 static void sst_pcm_free(struct snd_pcm *pcm)
diff --git a/sound/soc/samsung/goni_wm8994.c b/sound/soc/samsung/goni_wm8994.c
index f6b3a3ce5919..0e80daee8b6f 100644
--- a/sound/soc/samsung/goni_wm8994.c
+++ b/sound/soc/samsung/goni_wm8994.c
@@ -236,18 +236,18 @@ static struct snd_soc_dai_link goni_dai[] = {
 	.name = "WM8994",
 	.stream_name = "WM8994 HiFi",
 	.cpu_dai_name = "samsung-i2s.0",
-	.codec_dai_name = "wm8994-hifi",
+	.codec_dai_name = "wm8994-aif1",
 	.platform_name = "samsung-audio",
-	.codec_name = "wm8994-codec.0-0x1a",
+	.codec_name = "wm8994-codec.0-001a",
 	.init = goni_wm8994_init,
 	.ops = &goni_hifi_ops,
 }, {
 	.name = "WM8994 Voice",
 	.stream_name = "Voice",
 	.cpu_dai_name = "goni-voice-dai",
-	.codec_dai_name = "wm8994-voice",
+	.codec_dai_name = "wm8994-aif2",
 	.platform_name = "samsung-audio",
-	.codec_name = "wm8994-codec.0-0x1a",
+	.codec_name = "wm8994-codec.0-001a",
 	.ops = &goni_voice_ops,
 },
 };
diff --git a/sound/soc/samsung/pcm.c b/sound/soc/samsung/pcm.c
index 38aac7d57a59..9c7e8b48aed6 100644
--- a/sound/soc/samsung/pcm.c
+++ b/sound/soc/samsung/pcm.c
@@ -350,8 +350,8 @@ static int s3c_pcm_set_fmt(struct snd_soc_dai *cpu_dai,
 	ctl = readl(regs + S3C_PCM_CTL);
 
 	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
-	case SND_SOC_DAIFMT_NB_NF:
-		/* Nothing to do, NB_NF by default */
+	case SND_SOC_DAIFMT_IB_NF:
+		/* Nothing to do, IB_NF by default */
 		break;
 	default:
 		dev_err(pcm->dev, "Unsupported clock inversion!\n");
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index 0c9997e2d8c0..23c0e83d4c19 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -1200,10 +1200,11 @@ static int fsi_probe(struct platform_device *pdev)
 	master->fsib.master	= master;
 
 	pm_runtime_enable(&pdev->dev);
-	pm_runtime_resume(&pdev->dev);
 	dev_set_drvdata(&pdev->dev, master);
 
+	pm_runtime_get_sync(&pdev->dev);
 	fsi_soft_all_reset(master);
+	pm_runtime_put_sync(&pdev->dev);
 
 	ret = request_irq(irq, &fsi_interrupt, IRQF_DISABLED,
 			  id_entry->name, master);
@@ -1218,8 +1219,17 @@ static int fsi_probe(struct platform_device *pdev)
 		goto exit_free_irq;
 	}
 
-	return snd_soc_register_dais(&pdev->dev, fsi_soc_dai, ARRAY_SIZE(fsi_soc_dai));
+	ret = snd_soc_register_dais(&pdev->dev, fsi_soc_dai,
+				    ARRAY_SIZE(fsi_soc_dai));
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot snd dai register\n");
+		goto exit_snd_soc;
+	}
+
+	return ret;
 
+exit_snd_soc:
+	snd_soc_unregister_platform(&pdev->dev);
 exit_free_irq:
 	free_irq(irq, master);
 exit_iounmap:
@@ -1238,12 +1248,11 @@ static int fsi_remove(struct platform_device *pdev)
 
 	master = dev_get_drvdata(&pdev->dev);
 
-	snd_soc_unregister_dais(&pdev->dev, ARRAY_SIZE(fsi_soc_dai));
-	snd_soc_unregister_platform(&pdev->dev);
-
+	free_irq(master->irq, master);
 	pm_runtime_disable(&pdev->dev);
 
-	free_irq(master->irq, master);
+	snd_soc_unregister_dais(&pdev->dev, ARRAY_SIZE(fsi_soc_dai));
+	snd_soc_unregister_platform(&pdev->dev);
 
 	iounmap(master->base);
 	kfree(master);
@@ -1321,3 +1330,4 @@ module_exit(fsi_mobile_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SuperH onchip FSI audio driver");
 MODULE_AUTHOR("Kuninori Morimoto <morimoto.kuninori@renesas.com>");
+MODULE_ALIAS("platform:fsi-pcm-audio");
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index b76b74db0968..dd55d1069468 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -629,6 +629,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 			runtime->hw.rates |= codec_dai_drv->capture.rates;
 	}
 
+	ret = -EINVAL;
 	snd_pcm_limit_hw_rates(runtime);
 	if (!runtime->hw.rates) {
 		printk(KERN_ERR "asoc: %s <-> %s No matching rates\n",
@@ -640,7 +641,8 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 			codec_dai->name, cpu_dai->name);
 		goto config_err;
 	}
-	if (!runtime->hw.channels_min || !runtime->hw.channels_max) {
+	if (!runtime->hw.channels_min || !runtime->hw.channels_max ||
+	    runtime->hw.channels_min > runtime->hw.channels_max) {
 		printk(KERN_ERR "asoc: %s <-> %s No matching channels\n",
 				codec_dai->name, cpu_dai->name);
 		goto config_err;
@@ -2060,6 +2062,7 @@ const struct dev_pm_ops snd_soc_pm_ops = {
 	.resume = snd_soc_resume,
 	.poweroff = snd_soc_poweroff,
 };
+EXPORT_SYMBOL_GPL(snd_soc_pm_ops);
 
 /* ASoC platform driver */
 static struct platform_driver soc_driver = {
@@ -3288,6 +3291,8 @@ int snd_soc_register_card(struct snd_soc_card *card)
 	if (!card->name || !card->dev)
 		return -EINVAL;
 
+	dev_set_drvdata(card->dev, card);
+
 	snd_soc_initialize_card_lists(card);
 
 	soc_init_card_debugfs(card);
diff --git a/sound/soc/tegra/harmony.c b/sound/soc/tegra/harmony.c
index 8585957477eb..556a57133925 100644
--- a/sound/soc/tegra/harmony.c
+++ b/sound/soc/tegra/harmony.c
@@ -370,6 +370,7 @@ static struct platform_driver tegra_snd_harmony_driver = {
 	.driver = {
 		.name = DRV_NAME,
 		.owner = THIS_MODULE,
+		.pm = &snd_soc_pm_ops,
 	},
 	.probe = tegra_snd_harmony_probe,
 	.remove = __devexit_p(tegra_snd_harmony_remove),
diff --git a/sound/usb/format.c b/sound/usb/format.c
index 5b792d2c8061..f079b5e2ab28 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -176,9 +176,11 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof
 			if (!rate)
 				continue;
 			/* C-Media CM6501 mislabels its 96 kHz altsetting */
+			/* Terratec Aureon 7.1 USB C-Media 6206, too */
 			if (rate == 48000 && nr_rates == 1 &&
 			    (chip->usb_id == USB_ID(0x0d8c, 0x0201) ||
-			     chip->usb_id == USB_ID(0x0d8c, 0x0102)) &&
+			     chip->usb_id == USB_ID(0x0d8c, 0x0102) ||
+			     chip->usb_id == USB_ID(0x0ccd, 0x00b1)) &&
 			    fp->altsetting == 5 && fp->maxpacksize == 392)
 				rate = 96000;
 			/* Creative VF0470 Live Cam reports 16 kHz instead of 8kHz */
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index ec07e62e53f3..1b94ec3a3368 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -533,6 +533,7 @@ int snd_usb_apply_boot_quirk(struct usb_device *dev,
 
 	case USB_ID(0x0d8c, 0x0102):
 		/* C-Media CM6206 / CM106-Like Sound Device */
+	case USB_ID(0x0ccd, 0x00b1): /* Terratec Aureon 7.1 USB */
 		return snd_usb_cm6206_boot_quirk(dev);
 
 	case USB_ID(0x133e, 0x0815):
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index 5bb41e55a3ac..3152cca15501 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -63,7 +63,6 @@ The format file for the sched_wakep event defines the following fields
         field:unsigned char common_flags;
         field:unsigned char common_preempt_count;
         field:int common_pid;
-        field:int common_lock_depth;
 
         field:char comm[TASK_COMM_LEN];
         field:pid_t pid;
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 36b38277422c..471022069119 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -463,7 +463,6 @@ The format file for the sched_wakep event defines the following fields
         field:unsigned char common_flags;
         field:unsigned char common_preempt_count;
         field:int common_pid;
-        field:int common_lock_depth;
 
         field:char comm[TASK_COMM_LEN];
         field:pid_t pid;
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 66f040b30729..86c87e214b11 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -113,13 +113,61 @@ OPTIONS
         Do various checks like samples ordering and lost events.
 
 -f::
---fields
+--fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, sym. Field
-        list must be prepended with the type, trace, sw or hw,
+        list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -f sw:comm,tid,time,sym  and -f trace:time,cpu,trace
 
+		perf script -f <fields>
+
+	is equivalent to:
+
+		perf script -f trace:<fields> -f sw:<fields> -f hw:<fields>
+    
+	i.e., the specified fields apply to all event types if the type string
+	is not given.
+    
+	The arguments are processed in the order received. A later usage can
+	reset a prior request. e.g.:
+    
+		-f trace: -f comm,tid,time,sym
+    
+	The first -f suppresses trace events (field list is ""), but then the
+	second invocation sets the fields to comm,tid,time,sym. In this case a
+	warning is given to the user:
+    
+		"Overriding previous field request for all events."
+    
+	Alternativey, consider the order:
+    
+		-f comm,tid,time,sym -f trace:
+    
+	The first -f sets the fields for all events and the second -f
+	suppresses trace events. The user is given a warning message about
+	the override, and the result of the above is that only S/W and H/W
+	events are displayed with the given fields.
+    
+	For the 'wildcard' option if a user selected field is invalid for an
+	event type, a message is displayed to the user that the option is
+	ignored for that type. For example:
+    
+		$ perf script -f comm,tid,trace
+		'trace' not valid for hardware events. Ignoring.
+		'trace' not valid for software events. Ignoring.
+    
+	Alternatively, if the type is given an invalid field is specified it
+	is an error. For example:
+    
+        perf script -v -f sw:comm,tid,trace
+        'trace' not valid for software events.
+    
+	At this point usage is displayed, and perf-script exits.
+    
+	Finally, a user may not set fields to none for all event types.
+	i.e., -f "" is not allowed.
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 207dee5c5b16..1455413ec7a7 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -5,6 +5,8 @@ endif
 # The default target of this Makefile is...
 all:
 
+include config/utilities.mak
+
 ifneq ($(OUTPUT),)
 # check that the output directory actually exists
 OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
@@ -13,6 +15,12 @@ endif
 
 # Define V to have a more verbose compile.
 #
+# Define PYTHON to point to the python binary if the default
+# `python' is not correct; for example: PYTHON=python2
+#
+# Define PYTHON_CONFIG to point to the python-config binary if
+# the default `$(PYTHON)-config' is not correct.
+#
 # Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
 #
 # Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
@@ -35,15 +43,21 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
 				  -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
 				  -e s/sh[234].*/sh/ )
 
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+
 # Additional ARCH settings for x86
 ifeq ($(ARCH),i386)
         ARCH := x86
 endif
 ifeq ($(ARCH),x86_64)
-	RAW_ARCH := x86_64
-        ARCH := x86
-	ARCH_CFLAGS := -DARCH_X86_64
-	ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
+	ARCH := x86
+	IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1)
+	ifeq (${IS_X86_64}, 1)
+		RAW_ARCH := x86_64
+		ARCH_CFLAGS := -DARCH_X86_64
+		ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
+	endif
 endif
 
 #
@@ -119,8 +133,6 @@ lib = lib
 
 export prefix bindir sharedir sysconfdir
 
-CC = $(CROSS_COMPILE)gcc
-AR = $(CROSS_COMPILE)ar
 RM = rm -f
 MKDIR = mkdir
 FIND = find
@@ -130,7 +142,7 @@ INSTALL = install
 # explicitly what architecture to check for. Fix this up for yours..
 SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
 
--include feature-tests.mak
+-include config/feature-tests.mak
 
 ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
 	CFLAGS := $(CFLAGS) -fstack-protector-all
@@ -165,12 +177,10 @@ grep-libs = $(filter -l%,$(1))
 strip-libs = $(filter-out -l%,$(1))
 
 $(OUTPUT)python/perf.so: $(PYRF_OBJS)
-	$(QUIET_GEN)( \
-		export CFLAGS="$(BASIC_CFLAGS)"; \
-		python util/setup.py --quiet  build_ext --build-lib='$(OUTPUT)python' \
-			--build-temp='$(OUTPUT)python/temp' \
-	)
-
+	$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
+	  --quiet build_ext \
+	  --build-lib='$(OUTPUT)python' \
+	  --build-temp='$(OUTPUT)python/temp'
 #
 # No Perl scripts right now:
 #
@@ -475,24 +485,74 @@ else
 	endif
 endif
 
-ifdef NO_LIBPYTHON
-	BASIC_CFLAGS += -DNO_LIBPYTHON
+disable-python = $(eval $(disable-python_code))
+define disable-python_code
+  BASIC_CFLAGS += -DNO_LIBPYTHON
+  $(if $(1),$(warning No $(1) was found))
+  $(warning Python support won't be built)
+endef
+
+override PYTHON := \
+  $(call get-executable-or-default,PYTHON,python)
+
+ifndef PYTHON
+  $(call disable-python,python interpreter)
+  python-clean :=
 else
-       PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null)
-       PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
-       PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
-	PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
-	FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
-	ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
-		msg := $(warning No Python.h found, install python-dev[el] to have python support in 'perf script' and to build the python bindings)
-		BASIC_CFLAGS += -DNO_LIBPYTHON
-	else
-               ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
-               EXTLIBS += $(PYTHON_EMBED_LIBADD)
-		LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
-		LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
-		LANG_BINDINGS += $(OUTPUT)python/perf.so
-	endif
+
+  PYTHON_WORD := $(call shell-wordify,$(PYTHON))
+
+  python-clean := $(PYTHON_WORD) util/setup.py clean \
+    --build-lib='$(OUTPUT)python' \
+    --build-temp='$(OUTPUT)python/temp'
+
+  ifdef NO_LIBPYTHON
+    $(call disable-python)
+  else
+
+    override PYTHON_CONFIG := \
+      $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON)-config)
+
+    ifndef PYTHON_CONFIG
+      $(call disable-python,python-config tool)
+    else
+
+      PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+
+      PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+      PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
+      PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
+      PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+      FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+
+      ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
+        $(call disable-python,Python.h (for Python 2.x))
+      else
+
+        ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED)),y)
+          $(warning Python 3 is not yet supported; please set)
+          $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
+          $(warning If you also have Python 2 installed, then)
+          $(warning try something like:)
+          $(warning $(and ,))
+          $(warning $(and ,)  make PYTHON=python2)
+          $(warning $(and ,))
+          $(warning Otherwise, disable Python support entirely:)
+          $(warning $(and ,))
+          $(warning $(and ,)  make NO_LIBPYTHON=1)
+          $(warning $(and ,))
+          $(error   $(and ,))
+        else
+          ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
+          EXTLIBS += $(PYTHON_EMBED_LIBADD)
+          LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
+          LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
+          LANG_BINDINGS += $(OUTPUT)python/perf.so
+        endif
+
+      endif
+    endif
+  endif
 endif
 
 ifdef NO_DEMANGLE
@@ -833,8 +893,7 @@ clean:
 	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
 	$(MAKE) -C Documentation/ clean
 	$(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
-	@python util/setup.py clean --build-lib='$(OUTPUT)python' \
-				   --build-temp='$(OUTPUT)python/temp'
+	$(python-clean)
 
 .PHONY: all install clean strip
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 17d1dcb3c667..0974f957b8fa 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -163,6 +163,7 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
 	struct perf_event_attr *attr = &evsel->attr;
 	int track = !evsel->idx; /* only the first counter needs these */
 
+	attr->inherit		= !no_inherit;
 	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
 				  PERF_FORMAT_TOTAL_TIME_RUNNING |
 				  PERF_FORMAT_ID;
@@ -251,6 +252,9 @@ static void open_counters(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos;
 
+	if (evlist->cpus->map[0] < 0)
+		no_inherit = true;
+
 	list_for_each_entry(pos, &evlist->entries, node) {
 		struct perf_event_attr *attr = &pos->attr;
 		/*
@@ -271,8 +275,7 @@ static void open_counters(struct perf_evlist *evlist)
 retry_sample_id:
 		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 try_again:
-		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
-				     !no_inherit) < 0) {
+		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
 			int err = errno;
 
 			if (err == EPERM || err == EACCES) {
@@ -424,7 +427,7 @@ static void mmap_read_all(void)
 {
 	int i;
 
-	for (i = 0; i < evsel_list->cpus->nr; i++) {
+	for (i = 0; i < evsel_list->nr_mmaps; i++) {
 		if (evsel_list->mmap[i].base)
 			mmap_read(&evsel_list->mmap[i]);
 	}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ac574ea23917..974f6d3f4e53 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -49,57 +49,169 @@ struct output_option {
 };
 
 /* default set to maintain compatibility with current format */
-static u64 output_fields[PERF_TYPE_MAX] = {
-	[PERF_TYPE_HARDWARE] = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \
-			       PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \
-			       PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
-
-	[PERF_TYPE_SOFTWARE] = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \
-			       PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \
-			       PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
-
-	[PERF_TYPE_TRACEPOINT] = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | \
-				 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | \
-				 PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE,
+static struct {
+	bool user_set;
+	bool wildcard_set;
+	u64 fields;
+	u64 invalid_fields;
+} output[PERF_TYPE_MAX] = {
+
+	[PERF_TYPE_HARDWARE] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
+
+		.invalid_fields = PERF_OUTPUT_TRACE,
+	},
+
+	[PERF_TYPE_SOFTWARE] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
+
+		.invalid_fields = PERF_OUTPUT_TRACE,
+	},
+
+	[PERF_TYPE_TRACEPOINT] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+				  PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE,
+	},
+
+	[PERF_TYPE_RAW] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
+
+		.invalid_fields = PERF_OUTPUT_TRACE,
+	},
 };
 
-static bool output_set_by_user;
+static bool output_set_by_user(void)
+{
+	int j;
+	for (j = 0; j < PERF_TYPE_MAX; ++j) {
+		if (output[j].user_set)
+			return true;
+	}
+	return false;
+}
+
+static const char *output_field2str(enum perf_output_field field)
+{
+	int i, imax = ARRAY_SIZE(all_output_options);
+	const char *str = "";
+
+	for (i = 0; i < imax; ++i) {
+		if (all_output_options[i].field == field) {
+			str = all_output_options[i].str;
+			break;
+		}
+	}
+	return str;
+}
 
-#define PRINT_FIELD(x)  (output_fields[attr->type] & PERF_OUTPUT_##x)
+#define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x)
 
-static int perf_session__check_attr(struct perf_session *session,
-				    struct perf_event_attr *attr)
+static int perf_event_attr__check_stype(struct perf_event_attr *attr,
+				  u64 sample_type, const char *sample_msg,
+				  enum perf_output_field field)
 {
+	int type = attr->type;
+	const char *evname;
+
+	if (attr->sample_type & sample_type)
+		return 0;
+
+	if (output[type].user_set) {
+		evname = __event_name(attr->type, attr->config);
+		pr_err("Samples for '%s' event do not have %s attribute set. "
+		       "Cannot print '%s' field.\n",
+		       evname, sample_msg, output_field2str(field));
+		return -1;
+	}
+
+	/* user did not ask for it explicitly so remove from the default list */
+	output[type].fields &= ~field;
+	evname = __event_name(attr->type, attr->config);
+	pr_debug("Samples for '%s' event do not have %s attribute set. "
+		 "Skipping '%s' field.\n",
+		 evname, sample_msg, output_field2str(field));
+
+	return 0;
+}
+
+static int perf_evsel__check_attr(struct perf_evsel *evsel,
+				  struct perf_session *session)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
 	if (PRINT_FIELD(TRACE) &&
 		!perf_session__has_traces(session, "record -R"))
 		return -EINVAL;
 
 	if (PRINT_FIELD(SYM)) {
-		if (!(session->sample_type & PERF_SAMPLE_IP)) {
-			pr_err("Samples do not contain IP data.\n");
+		if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP",
+					   PERF_OUTPUT_SYM))
 			return -EINVAL;
-		}
+
 		if (!no_callchain &&
-		    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
+		    !(attr->sample_type & PERF_SAMPLE_CALLCHAIN))
 			symbol_conf.use_callchain = false;
 	}
 
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
-		!(session->sample_type & PERF_SAMPLE_TID)) {
-		pr_err("Samples do not contain TID/PID data.\n");
+		perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID",
+				       PERF_OUTPUT_TID|PERF_OUTPUT_PID))
 		return -EINVAL;
-	}
 
 	if (PRINT_FIELD(TIME) &&
-		!(session->sample_type & PERF_SAMPLE_TIME)) {
-		pr_err("Samples do not contain timestamps.\n");
+		perf_event_attr__check_stype(attr, PERF_SAMPLE_TIME, "TIME",
+				       PERF_OUTPUT_TIME))
 		return -EINVAL;
-	}
 
 	if (PRINT_FIELD(CPU) &&
-		!(session->sample_type & PERF_SAMPLE_CPU)) {
-		pr_err("Samples do not contain cpu.\n");
+		perf_event_attr__check_stype(attr, PERF_SAMPLE_CPU, "CPU",
+				       PERF_OUTPUT_CPU))
 		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * verify all user requested events exist and the samples
+ * have the expected data
+ */
+static int perf_session__check_output_opt(struct perf_session *session)
+{
+	int j;
+	struct perf_evsel *evsel;
+
+	for (j = 0; j < PERF_TYPE_MAX; ++j) {
+		evsel = perf_session__find_first_evtype(session, j);
+
+		/*
+		 * even if fields is set to 0 (ie., show nothing) event must
+		 * exist if user explicitly includes it on the command line
+		 */
+		if (!evsel && output[j].user_set && !output[j].wildcard_set) {
+			pr_err("%s events do not exist. "
+			       "Remove corresponding -f option to proceed.\n",
+			       event_type(j));
+			return -1;
+		}
+
+		if (evsel && output[j].fields &&
+			perf_evsel__check_attr(evsel, session))
+			return -1;
 	}
 
 	return 0;
@@ -168,10 +280,7 @@ static void process_event(union perf_event *event __unused,
 {
 	struct perf_event_attr *attr = &evsel->attr;
 
-	if (output_fields[attr->type] == 0)
-		return;
-
-	if (perf_session__check_attr(session, attr) < 0)
+	if (output[attr->type].fields == 0)
 		return;
 
 	print_sample_start(sample, thread, attr);
@@ -451,6 +560,7 @@ static int parse_output_fields(const struct option *opt __used,
 {
 	char *tok;
 	int i, imax = sizeof(all_output_options) / sizeof(struct output_option);
+	int j;
 	int rc = 0;
 	char *str = strdup(arg);
 	int type = -1;
@@ -458,52 +568,99 @@ static int parse_output_fields(const struct option *opt __used,
 	if (!str)
 		return -ENOMEM;
 
-	tok = strtok(str, ":");
-	if (!tok) {
-		fprintf(stderr,
-			"Invalid field string - not prepended with type.");
-		return -EINVAL;
-	}
-
-	/* first word should state which event type user
-	 * is specifying the fields
+	/* first word can state for which event type the user is specifying
+	 * the fields. If no type exists, the specified fields apply to all
+	 * event types found in the file minus the invalid fields for a type.
 	 */
-	if (!strcmp(tok, "hw"))
-		type = PERF_TYPE_HARDWARE;
-	else if (!strcmp(tok, "sw"))
-		type = PERF_TYPE_SOFTWARE;
-	else if (!strcmp(tok, "trace"))
-		type = PERF_TYPE_TRACEPOINT;
-	else {
-		fprintf(stderr, "Invalid event type in field string.");
-		return -EINVAL;
+	tok = strchr(str, ':');
+	if (tok) {
+		*tok = '\0';
+		tok++;
+		if (!strcmp(str, "hw"))
+			type = PERF_TYPE_HARDWARE;
+		else if (!strcmp(str, "sw"))
+			type = PERF_TYPE_SOFTWARE;
+		else if (!strcmp(str, "trace"))
+			type = PERF_TYPE_TRACEPOINT;
+		else if (!strcmp(str, "raw"))
+			type = PERF_TYPE_RAW;
+		else {
+			fprintf(stderr, "Invalid event type in field string.\n");
+			return -EINVAL;
+		}
+
+		if (output[type].user_set)
+			pr_warning("Overriding previous field request for %s events.\n",
+				   event_type(type));
+
+		output[type].fields = 0;
+		output[type].user_set = true;
+		output[type].wildcard_set = false;
+
+	} else {
+		tok = str;
+		if (strlen(str) == 0) {
+			fprintf(stderr,
+				"Cannot set fields to 'none' for all event types.\n");
+			rc = -EINVAL;
+			goto out;
+		}
+
+		if (output_set_by_user())
+			pr_warning("Overriding previous field request for all events.\n");
+
+		for (j = 0; j < PERF_TYPE_MAX; ++j) {
+			output[j].fields = 0;
+			output[j].user_set = true;
+			output[j].wildcard_set = true;
+		}
 	}
 
-	output_fields[type] = 0;
-	while (1) {
-		tok = strtok(NULL, ",");
-		if (!tok)
-			break;
+	tok = strtok(tok, ",");
+	while (tok) {
 		for (i = 0; i < imax; ++i) {
-			if (strcmp(tok, all_output_options[i].str) == 0) {
-				output_fields[type] |= all_output_options[i].field;
+			if (strcmp(tok, all_output_options[i].str) == 0)
 				break;
-			}
 		}
 		if (i == imax) {
-			fprintf(stderr, "Invalid field requested.");
+			fprintf(stderr, "Invalid field requested.\n");
 			rc = -EINVAL;
-			break;
+			goto out;
 		}
-	}
 
-	if (output_fields[type] == 0) {
-		pr_debug("No fields requested for %s type. "
-			 "Events will not be displayed\n", event_type(type));
+		if (type == -1) {
+			/* add user option to all events types for
+			 * which it is valid
+			 */
+			for (j = 0; j < PERF_TYPE_MAX; ++j) {
+				if (output[j].invalid_fields & all_output_options[i].field) {
+					pr_warning("\'%s\' not valid for %s events. Ignoring.\n",
+						   all_output_options[i].str, event_type(j));
+				} else
+					output[j].fields |= all_output_options[i].field;
+			}
+		} else {
+			if (output[type].invalid_fields & all_output_options[i].field) {
+				fprintf(stderr, "\'%s\' not valid for %s events.\n",
+					 all_output_options[i].str, event_type(type));
+
+				rc = -EINVAL;
+				goto out;
+			}
+			output[type].fields |= all_output_options[i].field;
+		}
+
+		tok = strtok(NULL, ",");
 	}
 
-	output_set_by_user = true;
+	if (type >= 0) {
+		if (output[type].fields == 0) {
+			pr_debug("No fields requested for %s type. "
+				 "Events will not be displayed.\n", event_type(type));
+		}
+	}
 
+out:
 	free(str);
 	return rc;
 }
@@ -829,7 +986,7 @@ static const struct option options[] = {
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
 	OPT_CALLBACK('f', "fields", NULL, "str",
-		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace. Fields: comm,tid,pid,time,cpu,event,trace,sym",
+		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,sym",
 		     parse_output_fields),
 
 	OPT_END()
@@ -1020,7 +1177,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
 		struct stat perf_stat;
 		int input;
 
-		if (output_set_by_user) {
+		if (output_set_by_user()) {
 			fprintf(stderr,
 				"custom fields not supported for generated scripts");
 			return -1;
@@ -1060,6 +1217,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
 		pr_debug("perf script started with script %s\n\n", script_name);
 	}
 
+
+	err = perf_session__check_output_opt(session);
+	if (err < 0)
+		goto out;
+
 	err = __cmd_script(session);
 
 	perf_session__delete(session);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e2109f9b43eb..a9f06715e44d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -6,24 +6,28 @@
  *
  * Sample output:
 
-   $ perf stat ~/hackbench 10
-   Time: 0.104
+   $ perf stat ./hackbench 10
 
-    Performance counter stats for '/home/mingo/hackbench':
+  Time: 0.118
 
-       1255.538611  task clock ticks     #      10.143 CPU utilization factor
-             54011  context switches     #       0.043 M/sec
-               385  CPU migrations       #       0.000 M/sec
-             17755  pagefaults           #       0.014 M/sec
-        3808323185  CPU cycles           #    3033.219 M/sec
-        1575111190  instructions         #    1254.530 M/sec
-          17367895  cache references     #      13.833 M/sec
-           7674421  cache misses         #       6.112 M/sec
+  Performance counter stats for './hackbench 10':
 
-    Wall-clock time elapsed:   123.786620 msecs
+       1708.761321 task-clock                #   11.037 CPUs utilized
+            41,190 context-switches          #    0.024 M/sec
+             6,735 CPU-migrations            #    0.004 M/sec
+            17,318 page-faults               #    0.010 M/sec
+     5,205,202,243 cycles                    #    3.046 GHz
+     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
+     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
+     2,603,501,247 instructions              #    0.50  insns per cycle
+                                             #    1.48  stalled cycles per insn
+       484,357,498 branches                  #  283.455 M/sec
+         6,388,934 branch-misses             #    1.32% of all branches
+
+        0.154822978  seconds time elapsed
 
  *
- * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  *
  * Improvements and fixes by:
  *
@@ -46,6 +50,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/color.h"
 #include "util/header.h"
 #include "util/cpumap.h"
 #include "util/thread.h"
@@ -65,14 +70,107 @@ static struct perf_event_attr default_attrs[] = {
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES		},
 
 };
 
+/*
+ * Detailed stats (-d), covering the L1 and last level data caches:
+ */
+static struct perf_event_attr detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_LL			<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_LL			<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+};
+
+/*
+ * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
+ */
+static struct perf_event_attr very_detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+};
+
+/*
+ * Very, very detailed stats (-d -d -d), adding prefetch events:
+ */
+static struct perf_event_attr very_very_detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+};
+
+
+
 struct perf_evlist		*evsel_list;
 
 static bool			system_wide			=  false;
@@ -86,6 +184,8 @@ static pid_t			target_pid			= -1;
 static pid_t			target_tid			= -1;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
+static int			detailed_run			=  0;
+static bool			sync_run			=  false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*cpu_list;
@@ -156,7 +256,15 @@ static double stddev_stats(struct stats *stats)
 
 struct stats			runtime_nsecs_stats[MAX_NR_CPUS];
 struct stats			runtime_cycles_stats[MAX_NR_CPUS];
+struct stats			runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
+struct stats			runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
 struct stats			runtime_branches_stats[MAX_NR_CPUS];
+struct stats			runtime_cacherefs_stats[MAX_NR_CPUS];
+struct stats			runtime_l1_dcache_stats[MAX_NR_CPUS];
+struct stats			runtime_l1_icache_stats[MAX_NR_CPUS];
+struct stats			runtime_ll_cache_stats[MAX_NR_CPUS];
+struct stats			runtime_itlb_cache_stats[MAX_NR_CPUS];
+struct stats			runtime_dtlb_cache_stats[MAX_NR_CPUS];
 struct stats			walltime_nsecs_stats;
 
 static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -167,16 +275,17 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 				    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
+	attr->inherit = !no_inherit;
+
 	if (system_wide)
-		return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false);
+		return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false);
 
-	attr->inherit = !no_inherit;
 	if (target_pid == -1 && target_tid == -1) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
 
-	return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false);
+	return perf_evsel__open_per_thread(evsel, evsel_list->threads, false);
 }
 
 /*
@@ -192,6 +301,37 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 }
 
 /*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
+{
+	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+		update_stats(&runtime_nsecs_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+		update_stats(&runtime_cycles_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+		update_stats(&runtime_branches_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+		update_stats(&runtime_cacherefs_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+		update_stats(&runtime_l1_dcache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+		update_stats(&runtime_l1_icache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+		update_stats(&runtime_ll_cache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+		update_stats(&runtime_itlb_cache_stats[0], count[0]);
+}
+
+/*
  * Read out the results of a single counter:
  * aggregate counts across CPUs in system-wide mode
  */
@@ -216,12 +356,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-		update_stats(&runtime_nsecs_stats[0], count[0]);
-	if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[0], count[0]);
-	if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[0], count[0]);
+	update_shadow_stats(counter, count);
 
 	return 0;
 }
@@ -241,12 +376,7 @@ static int read_counter(struct perf_evsel *counter)
 
 		count = counter->counts->cpu[cpu].values;
 
-		if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-			update_stats(&runtime_nsecs_stats[cpu], count[0]);
-		if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-			update_stats(&runtime_cycles_stats[cpu], count[0]);
-		if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-			update_stats(&runtime_branches_stats[cpu], count[0]);
+		update_shadow_stats(counter, count);
 	}
 
 	return 0;
@@ -314,13 +444,18 @@ static int run_perf_stat(int argc __used, const char **argv)
 
 	list_for_each_entry(counter, &evsel_list->entries, node) {
 		if (create_perf_stat_counter(counter) < 0) {
-			if (errno == -EPERM || errno == -EACCES) {
+			if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) {
+				if (verbose)
+					ui__warning("%s event is not supported by the kernel.\n",
+						    event_name(counter));
+				continue;
+			}
+
+			if (errno == EPERM || errno == EACCES) {
 				error("You may not have permission to collect %sstats.\n"
 				      "\t Consider tweaking"
 				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
 				      system_wide ? "system-wide " : "");
-			} else if (errno == ENOENT) {
-				error("%s event is not supported. ", event_name(counter));
 			} else {
 				error("open_counter returned with %d (%s). "
 				      "/bin/dmesg may provide additional information.\n",
@@ -371,6 +506,16 @@ static int run_perf_stat(int argc __used, const char **argv)
 	return WEXITSTATUS(status);
 }
 
+static void print_noise_pct(double total, double avg)
+{
+	double pct = 0.0;
+
+	if (avg)
+		pct = 100.0*total/avg;
+
+	fprintf(stderr, "  ( +-%6.2f%% )", pct);
+}
+
 static void print_noise(struct perf_evsel *evsel, double avg)
 {
 	struct perf_stat *ps;
@@ -379,15 +524,14 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 		return;
 
 	ps = evsel->priv;
-	fprintf(stderr, "   ( +- %7.3f%% )",
-			100 * stddev_stats(&ps->res_stats[0]) / avg);
+	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 }
 
 static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
 	double msecs = avg / 1e6;
 	char cpustr[16] = { '\0', };
-	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
+	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
 
 	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
@@ -403,8 +547,191 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 		return;
 
 	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
-		fprintf(stderr, " # %10.3f CPUs ",
-				avg / avg_stats(&walltime_nsecs_stats));
+		fprintf(stderr, " # %8.3f CPUs utilized          ", avg / avg_stats(&walltime_nsecs_stats));
+}
+
+static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_cycles_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 50.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 30.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " frontend cycles idle   ");
+}
+
+static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_cycles_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 75.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 50.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 20.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " backend  cycles idle   ");
+}
+
+static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_branches_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all branches        ");
+}
+
+static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_l1_dcache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all L1-dcache hits  ");
+}
+
+static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_l1_icache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all L1-icache hits  ");
+}
+
+static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all dTLB cache hits ");
+}
+
+static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_itlb_cache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all iTLB cache hits ");
+}
+
+static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_ll_cache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all LL-cache hits   ");
 }
 
 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
@@ -416,9 +743,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 	if (csv_output)
 		fmt = "%s%.0f%s%s";
 	else if (big_num)
-		fmt = "%s%'18.0f%s%-24s";
+		fmt = "%s%'18.0f%s%-25s";
 	else
-		fmt = "%s%18.0f%s%-24s";
+		fmt = "%s%18.0f%s%-25s";
 
 	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
@@ -441,23 +768,83 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 		if (total)
 			ratio = avg / total;
 
-		fprintf(stderr, " # %10.3f IPC  ", ratio);
+		fprintf(stderr, " #   %5.2f  insns per cycle        ", ratio);
+
+		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
+		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
+
+		if (total && avg) {
+			ratio = total / avg;
+			fprintf(stderr, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
+		}
+
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
 			runtime_branches_stats[cpu].n != 0) {
-		total = avg_stats(&runtime_branches_stats[cpu]);
+		print_branch_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_l1_dcache_stats[cpu].n != 0) {
+		print_l1_dcache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_l1_icache_stats[cpu].n != 0) {
+		print_l1_icache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_dtlb_cache_stats[cpu].n != 0) {
+		print_dtlb_cache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_itlb_cache_stats[cpu].n != 0) {
+		print_itlb_cache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_ll_cache_stats[cpu].n != 0) {
+		print_ll_cache_misses(cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
+			runtime_cacherefs_stats[cpu].n != 0) {
+		total = avg_stats(&runtime_cacherefs_stats[cpu]);
 
 		if (total)
 			ratio = avg * 100 / total;
 
-		fprintf(stderr, " # %10.3f %%    ", ratio);
+		fprintf(stderr, " # %8.3f %% of all cache refs    ", ratio);
 
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+		print_stalled_cycles_frontend(cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+		print_stalled_cycles_backend(cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+		total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+		if (total)
+			ratio = 1.0 * avg / total;
+
+		fprintf(stderr, " # %8.3f GHz                    ", ratio);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
 		if (total)
 			ratio = 1000.0 * avg / total;
 
-		fprintf(stderr, " # %10.3f M/sec", ratio);
+		fprintf(stderr, " # %8.3f M/sec                  ", ratio);
+	} else {
+		fprintf(stderr, "                                   ");
 	}
 }
 
@@ -504,8 +891,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
 		avg_enabled = avg_stats(&ps->res_stats[1]);
 		avg_running = avg_stats(&ps->res_stats[2]);
 
-		fprintf(stderr, "  (scaled from %.2f%%)",
-				100 * avg_running / avg_enabled);
+		fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled);
 	}
 	fprintf(stderr, "\n");
 }
@@ -547,10 +933,8 @@ static void print_counter(struct perf_evsel *counter)
 		if (!csv_output) {
 			print_noise(counter, 1.0);
 
-			if (run != ena) {
-				fprintf(stderr, "  (scaled from %.2f%%)",
-					100.0 * run / ena);
-			}
+			if (run != ena)
+				fprintf(stderr, "  (%.2f%%)", 100.0 * run / ena);
 		}
 		fputc('\n', stderr);
 	}
@@ -590,13 +974,14 @@ static void print_stat(int argc, const char **argv)
 	}
 
 	if (!csv_output) {
-		fprintf(stderr, "\n");
-		fprintf(stderr, " %18.9f  seconds time elapsed",
+		if (!null_run)
+			fprintf(stderr, "\n");
+		fprintf(stderr, " %17.9f seconds time elapsed",
 				avg_stats(&walltime_nsecs_stats)/1e9);
 		if (run_count > 1) {
-			fprintf(stderr, "   ( +- %7.3f%% )",
-				100*stddev_stats(&walltime_nsecs_stats) /
-				avg_stats(&walltime_nsecs_stats));
+			fprintf(stderr, "                                        ");
+			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
+					avg_stats(&walltime_nsecs_stats));
 		}
 		fprintf(stderr, "\n\n");
 	}
@@ -658,6 +1043,10 @@ static const struct option options[] = {
 		    "repeat command and print average + stddev (max: 100)"),
 	OPT_BOOLEAN('n', "null", &null_run,
 		    "null run - dont start any counters"),
+	OPT_INCR('d', "detailed", &detailed_run,
+		    "detailed run - start a lot of events"),
+	OPT_BOOLEAN('S', "sync", &sync_run,
+		    "call sync() before starting a run"),
 	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
 			   "print large numbers with thousands\' separators",
 			   stat__set_big_num),
@@ -673,6 +1062,70 @@ static const struct option options[] = {
 	OPT_END()
 };
 
+/*
+ * Add default attributes, if there were no attributes specified or
+ * if -d/--detailed, -d -d or -d -d -d is used:
+ */
+static int add_default_attributes(void)
+{
+	struct perf_evsel *pos;
+	size_t attr_nr = 0;
+	size_t c;
+
+	/* Set attrs if no event is selected and !null_run: */
+	if (null_run)
+		return 0;
+
+	if (!evsel_list->nr_entries) {
+		for (c = 0; c < ARRAY_SIZE(default_attrs); c++) {
+			pos = perf_evsel__new(default_attrs + c, c + attr_nr);
+			if (pos == NULL)
+				return -1;
+			perf_evlist__add(evsel_list, pos);
+		}
+		attr_nr += c;
+	}
+
+	/* Detailed events get appended to the event list: */
+
+	if (detailed_run <  1)
+		return 0;
+
+	/* Append detailed run extra attributes: */
+	for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) {
+		pos = perf_evsel__new(detailed_attrs + c, c + attr_nr);
+		if (pos == NULL)
+			return -1;
+		perf_evlist__add(evsel_list, pos);
+	}
+	attr_nr += c;
+
+	if (detailed_run < 2)
+		return 0;
+
+	/* Append very detailed run extra attributes: */
+	for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) {
+		pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr);
+		if (pos == NULL)
+			return -1;
+		perf_evlist__add(evsel_list, pos);
+	}
+
+	if (detailed_run < 3)
+		return 0;
+
+	/* Append very, very detailed run extra attributes: */
+	for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) {
+		pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr);
+		if (pos == NULL)
+			return -1;
+		perf_evlist__add(evsel_list, pos);
+	}
+
+
+	return 0;
+}
+
 int cmd_stat(int argc, const char **argv, const char *prefix __used)
 {
 	struct perf_evsel *pos;
@@ -718,17 +1171,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 		usage_with_options(stat_usage, options);
 	}
 
-	/* Set attrs and nr_counters if no event is selected and !null_run */
-	if (!null_run && !evsel_list->nr_entries) {
-		size_t c;
-
-		for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
-			pos = perf_evsel__new(&default_attrs[c], c);
-			if (pos == NULL)
-				goto out;
-			perf_evlist__add(evsel_list, pos);
-		}
-	}
+	if (add_default_attributes())
+		goto out;
 
 	if (target_pid != -1)
 		target_tid = target_pid;
@@ -772,6 +1216,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 	for (run_idx = 0; run_idx < run_count; run_idx++) {
 		if (run_count != 1 && verbose)
 			fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
+
+		if (sync_run)
+			sync();
+
 		status = run_perf_stat(argc, argv);
 	}
 
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 1b2106c58f66..2f9a337b182f 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -290,7 +290,7 @@ static int test__open_syscall_event(void)
 		goto out_thread_map_delete;
 	}
 
-	if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) {
+	if (perf_evsel__open_per_thread(evsel, threads, false) < 0) {
 		pr_debug("failed to open counter: %s, "
 			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
 			 strerror(errno));
@@ -303,7 +303,7 @@ static int test__open_syscall_event(void)
 	}
 
 	if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
-		pr_debug("perf_evsel__open_read_on_cpu\n");
+		pr_debug("perf_evsel__read_on_cpu\n");
 		goto out_close_fd;
 	}
 
@@ -365,7 +365,7 @@ static int test__open_syscall_event_on_all_cpus(void)
 		goto out_thread_map_delete;
 	}
 
-	if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) {
+	if (perf_evsel__open(evsel, cpus, threads, false) < 0) {
 		pr_debug("failed to open counter: %s, "
 			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
 			 strerror(errno));
@@ -418,7 +418,7 @@ static int test__open_syscall_event_on_all_cpus(void)
 			continue;
 
 		if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
-			pr_debug("perf_evsel__open_read_on_cpu\n");
+			pr_debug("perf_evsel__read_on_cpu\n");
 			err = -1;
 			break;
 		}
@@ -529,7 +529,7 @@ static int test__basic_mmap(void)
 
 		perf_evlist__add(evlist, evsels[i]);
 
-		if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) {
+		if (perf_evsel__open(evsels[i], cpus, threads, false) < 0) {
 			pr_debug("failed to open counter: %s, "
 				 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
 				 strerror(errno));
@@ -549,7 +549,7 @@ static int test__basic_mmap(void)
 			++foo;
 		}
 
-	while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) {
+	while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) {
 		struct perf_sample sample;
 
 		if (event->header.type != PERF_RECORD_SAMPLE) {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index fc1273e976c5..ebfc7cf5f63b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -801,12 +801,12 @@ static void perf_event__process_sample(const union perf_event *event,
 	}
 }
 
-static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
+static void perf_session__mmap_read_idx(struct perf_session *self, int idx)
 {
 	struct perf_sample sample;
 	union perf_event *event;
 
-	while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) {
+	while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) {
 		perf_session__parse_sample(self, event, &sample);
 
 		if (event->header.type == PERF_RECORD_SAMPLE)
@@ -820,8 +820,8 @@ static void perf_session__mmap_read(struct perf_session *self)
 {
 	int i;
 
-	for (i = 0; i < top.evlist->cpus->nr; i++)
-		perf_session__mmap_read_cpu(self, i);
+	for (i = 0; i < top.evlist->nr_mmaps; i++)
+		perf_session__mmap_read_idx(self, i);
 }
 
 static void start_counters(struct perf_evlist *evlist)
@@ -845,9 +845,10 @@ static void start_counters(struct perf_evlist *evlist)
 		}
 
 		attr->mmap = 1;
+		attr->inherit = inherit;
 try_again:
 		if (perf_evsel__open(counter, top.evlist->cpus,
-				     top.evlist->threads, group, inherit) < 0) {
+				     top.evlist->threads, group) < 0) {
 			int err = errno;
 
 			if (err == EPERM || err == EACCES) {
diff --git a/tools/perf/feature-tests.mak b/tools/perf/config/feature-tests.mak
index b041ca67a2cb..6170fd2531b5 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -79,9 +79,15 @@ endef
 endif
 
 ifndef NO_LIBPYTHON
+define SOURCE_PYTHON_VERSION
+#include <Python.h>
+#if PY_VERSION_HEX >= 0x03000000
+	#error
+#endif
+int main(void){}
+endef
 define SOURCE_PYTHON_EMBED
 #include <Python.h>
-
 int main(void)
 {
 	Py_Initialize();
@@ -120,11 +126,3 @@ int main(void)
 	return 0;
 }
 endef
-
-# try-cc
-# Usage: option = $(call try-cc, source-to-build, cc-options)
-try-cc = $(shell sh -c						  \
-	'TMP="$(OUTPUT)$(TMPOUT).$$$$";				  \
-	 echo "$(1)" |						  \
-	 $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
-	 rm -f "$$TMP"')
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
new file mode 100644
index 000000000000..8046182a19eb
--- /dev/null
+++ b/tools/perf/config/utilities.mak
@@ -0,0 +1,188 @@
+# This allows us to work with the newline character:
+define newline
+
+
+endef
+newline := $(newline)
+
+# nl-escape
+#
+# Usage: escape = $(call nl-escape[,escape])
+#
+# This is used as the common way to specify
+# what should replace a newline when escaping
+# newlines; the default is a bizarre string.
+#
+nl-escape = $(or $(1),m822df3020w6a44id34bt574ctac44eb9f4n)
+
+# escape-nl
+#
+# Usage: escaped-text = $(call escape-nl,text[,escape])
+#
+# GNU make's $(shell ...) function converts to a
+# single space each newline character in the output
+# produced during the expansion; this may not be
+# desirable.
+#
+# The only solution is to change each newline into
+# something that won't be converted, so that the
+# information can be recovered later with
+# $(call unescape-nl...)
+#
+escape-nl = $(subst $(newline),$(call nl-escape,$(2)),$(1))
+
+# unescape-nl
+#
+# Usage: text = $(call unescape-nl,escaped-text[,escape])
+#
+# See escape-nl.
+#
+unescape-nl = $(subst $(call nl-escape,$(2)),$(newline),$(1))
+
+# shell-escape-nl
+#
+# Usage: $(shell some-command | $(call shell-escape-nl[,escape]))
+#
+# Use this to escape newlines from within a shell call;
+# the default escape is a bizarre string.
+#
+# NOTE: The escape is used directly as a string constant
+#       in an `awk' program that is delimited by shell
+#       single-quotes, so be wary of the characters
+#       that are chosen.
+#
+define shell-escape-nl
+awk 'NR==1 {t=$$0} NR>1 {t=t "$(nl-escape)" $$0} END {printf t}'
+endef
+
+# shell-unescape-nl
+#
+# Usage: $(shell some-command | $(call shell-unescape-nl[,escape]))
+#
+# Use this to unescape newlines from within a shell call;
+# the default escape is a bizarre string.
+#
+# NOTE: The escape is used directly as an extended regular
+#       expression constant in an `awk' program that is
+#       delimited by shell single-quotes, so be wary
+#       of the characters that are chosen.
+#
+# (The bash shell has a bug where `{gsub(...),...}' is
+#  misinterpreted as a brace expansion; this can be
+#  overcome by putting a space between `{' and `gsub').
+#
+define shell-unescape-nl
+awk 'NR==1 {t=$$0} NR>1 {t=t "\n" $$0} END { gsub(/$(nl-escape)/,"\n",t); printf t }'
+endef
+
+# escape-for-shell-sq
+#
+# Usage: embeddable-text = $(call escape-for-shell-sq,text)
+#
+# This function produces text that is suitable for
+# embedding in a shell string that is delimited by
+# single-quotes.
+#
+escape-for-shell-sq =  $(subst ','\'',$(1))
+
+# shell-sq
+#
+# Usage: single-quoted-and-escaped-text = $(call shell-sq,text)
+#
+shell-sq = '$(escape-for-shell-sq)'
+
+# shell-wordify
+#
+# Usage: wordified-text = $(call shell-wordify,text)
+#
+# For instance:
+#
+#  |define text
+#  |hello
+#  |world
+#  |endef
+#  |
+#  |target:
+#  |	echo $(call shell-wordify,$(text))
+#
+# At least GNU make gets confused by expanding a newline
+# within the context of a command line of a makefile rule
+# (this is in constrast to a `$(shell ...)' function call,
+# which can handle it just fine).
+#
+# This function avoids the problem by producing a string
+# that works as a shell word, regardless of whether or
+# not it contains a newline.
+#
+# If the text to be wordified contains a newline, then
+# an intrictate shell command substitution is constructed
+# to render the text as a single line; when the shell
+# processes the resulting escaped text, it transforms
+# it into the original unescaped text.
+#
+# If the text does not contain a newline, then this function
+# produces the same results as the `$(shell-sq)' function.
+#
+shell-wordify = $(if $(findstring $(newline),$(1)),$(_sw-esc-nl),$(shell-sq))
+define _sw-esc-nl
+"$$(echo $(call escape-nl,$(shell-sq),$(2)) | $(call shell-unescape-nl,$(2)))"
+endef
+
+# is-absolute
+#
+# Usage: bool-value = $(call is-absolute,path)
+#
+is-absolute = $(shell echo $(shell-sq) | grep ^/ -q && echo y)
+
+# lookup
+#
+# Usage: absolute-executable-path-or-empty = $(call lookup,path)
+#
+# (It's necessary to use `sh -c' because GNU make messes up by
+#  trying too hard and getting things wrong).
+#
+lookup = $(call unescape-nl,$(shell sh -c $(_l-sh)))
+_l-sh = $(call shell-sq,command -v $(shell-sq) | $(call shell-escape-nl,))
+
+# is-executable
+#
+# Usage: bool-value = $(call is-executable,path)
+#
+# (It's necessary to use `sh -c' because GNU make messes up by
+#  trying too hard and getting things wrong).
+#
+is-executable = $(call _is-executable-helper,$(shell-sq))
+_is-executable-helper = $(shell sh -c $(_is-executable-sh))
+_is-executable-sh = $(call shell-sq,test -f $(1) -a -x $(1) && echo y)
+
+# get-executable
+#
+# Usage: absolute-executable-path-or-empty = $(call get-executable,path)
+#
+# The goal is to get an absolute path for an executable;
+# the `command -v' is defined by POSIX, but it's not
+# necessarily very portable, so it's only used if
+# relative path resolution is requested, as determined
+# by the presence of a leading `/'.
+#
+get-executable = $(if $(1),$(if $(is-absolute),$(_ge-abspath),$(lookup)))
+_ge-abspath = $(if $(is-executable),$(1))
+
+# get-supplied-or-default-executable
+#
+# Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default)
+#
+define get-executable-or-default
+$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
+endef
+_ge_attempt = $(or $(get-executable),$(_gea_warn),$(call _gea_err,$(2)))
+_gea_warn = $(warning The path '$(1)' is not executable.)
+_gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
+
+# try-cc
+# Usage: option = $(call try-cc, source-to-build, cc-options)
+try-cc = $(shell sh -c						  \
+	'TMP="$(OUTPUT)$(TMPOUT).$$$$";				  \
+	 echo "$(1)" |						  \
+	 $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
+	 rm -f "$$TMP"')
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 9fea75535221..96bee5c46008 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -13,7 +13,7 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
 {
 	FILE *fp;
 	char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1];
-	char *token, *saved_ptr;
+	char *token, *saved_ptr = NULL;
 	int found = 0;
 
 	fp = fopen("/proc/mounts", "r");
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d852cefa20de..23eb22b05d27 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -12,6 +12,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "util.h"
+#include "debug.h"
 
 #include <sys/mman.h>
 
@@ -165,11 +166,11 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 	return NULL;
 }
 
-union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu)
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	/* XXX Move this to perf.c, making it generally available */
 	unsigned int page_size = sysconf(_SC_PAGE_SIZE);
-	struct perf_mmap *md = &evlist->mmap[cpu];
+	struct perf_mmap *md = &evlist->mmap[idx];
 	unsigned int head = perf_mmap__read_head(md);
 	unsigned int old = md->prev;
 	unsigned char *data = md->base + page_size;
@@ -234,36 +235,126 @@ union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu)
 
 void perf_evlist__munmap(struct perf_evlist *evlist)
 {
-	int cpu;
+	int i;
 
-	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
-		if (evlist->mmap[cpu].base != NULL) {
-			munmap(evlist->mmap[cpu].base, evlist->mmap_len);
-			evlist->mmap[cpu].base = NULL;
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		if (evlist->mmap[i].base != NULL) {
+			munmap(evlist->mmap[i].base, evlist->mmap_len);
+			evlist->mmap[i].base = NULL;
 		}
 	}
+
+	free(evlist->mmap);
+	evlist->mmap = NULL;
 }
 
 int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 {
-	evlist->mmap = zalloc(evlist->cpus->nr * sizeof(struct perf_mmap));
+	evlist->nr_mmaps = evlist->cpus->nr;
+	if (evlist->cpus->map[0] == -1)
+		evlist->nr_mmaps = evlist->threads->nr;
+	evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
 	return evlist->mmap != NULL ? 0 : -ENOMEM;
 }
 
-static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
-			       int mask, int fd)
+static int __perf_evlist__mmap(struct perf_evlist *evlist, struct perf_evsel *evsel,
+			       int idx, int prot, int mask, int fd)
 {
-	evlist->mmap[cpu].prev = 0;
-	evlist->mmap[cpu].mask = mask;
-	evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot,
+	evlist->mmap[idx].prev = 0;
+	evlist->mmap[idx].mask = mask;
+	evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
 				      MAP_SHARED, fd, 0);
-	if (evlist->mmap[cpu].base == MAP_FAILED)
+	if (evlist->mmap[idx].base == MAP_FAILED) {
+		if (evlist->cpus->map[idx] == -1 && evsel->attr.inherit)
+			ui__warning("Inherit is not allowed on per-task "
+				    "events using mmap.\n");
 		return -1;
+	}
 
 	perf_evlist__add_pollfd(evlist, fd);
 	return 0;
 }
 
+static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask)
+{
+	struct perf_evsel *evsel;
+	int cpu, thread;
+
+	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+		int output = -1;
+
+		for (thread = 0; thread < evlist->threads->nr; thread++) {
+			list_for_each_entry(evsel, &evlist->entries, node) {
+				int fd = FD(evsel, cpu, thread);
+
+				if (output == -1) {
+					output = fd;
+					if (__perf_evlist__mmap(evlist, evsel, cpu,
+								prot, mask, output) < 0)
+						goto out_unmap;
+				} else {
+					if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
+						goto out_unmap;
+				}
+
+				if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+				    perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
+					goto out_unmap;
+			}
+		}
+	}
+
+	return 0;
+
+out_unmap:
+	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+		if (evlist->mmap[cpu].base != NULL) {
+			munmap(evlist->mmap[cpu].base, evlist->mmap_len);
+			evlist->mmap[cpu].base = NULL;
+		}
+	}
+	return -1;
+}
+
+static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask)
+{
+	struct perf_evsel *evsel;
+	int thread;
+
+	for (thread = 0; thread < evlist->threads->nr; thread++) {
+		int output = -1;
+
+		list_for_each_entry(evsel, &evlist->entries, node) {
+			int fd = FD(evsel, 0, thread);
+
+			if (output == -1) {
+				output = fd;
+				if (__perf_evlist__mmap(evlist, evsel, thread,
+							prot, mask, output) < 0)
+					goto out_unmap;
+			} else {
+				if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
+					goto out_unmap;
+			}
+
+			if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+			    perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0)
+				goto out_unmap;
+		}
+	}
+
+	return 0;
+
+out_unmap:
+	for (thread = 0; thread < evlist->threads->nr; thread++) {
+		if (evlist->mmap[thread].base != NULL) {
+			munmap(evlist->mmap[thread].base, evlist->mmap_len);
+			evlist->mmap[thread].base = NULL;
+		}
+	}
+	return -1;
+}
+
 /** perf_evlist__mmap - Create per cpu maps to receive events
  *
  * @evlist - list of events
@@ -282,11 +373,11 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
 int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
 {
 	unsigned int page_size = sysconf(_SC_PAGE_SIZE);
-	int mask = pages * page_size - 1, cpu;
-	struct perf_evsel *first_evsel, *evsel;
+	int mask = pages * page_size - 1;
+	struct perf_evsel *evsel;
 	const struct cpu_map *cpus = evlist->cpus;
 	const struct thread_map *threads = evlist->threads;
-	int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
+	int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
 
 	if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
 		return -ENOMEM;
@@ -296,42 +387,18 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
 
 	evlist->overwrite = overwrite;
 	evlist->mmap_len = (pages + 1) * page_size;
-	first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
 		    evsel->sample_id == NULL &&
 		    perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
 			return -ENOMEM;
-
-		for (cpu = 0; cpu < cpus->nr; cpu++) {
-			for (thread = 0; thread < threads->nr; thread++) {
-				int fd = FD(evsel, cpu, thread);
-
-				if (evsel->idx || thread) {
-					if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
-						  FD(first_evsel, cpu, 0)) != 0)
-						goto out_unmap;
-				} else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0)
-					goto out_unmap;
-
-				if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
-				    perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
-					goto out_unmap;
-			}
-		}
 	}
 
-	return 0;
+	if (evlist->cpus->map[0] == -1)
+		return perf_evlist__mmap_per_thread(evlist, prot, mask);
 
-out_unmap:
-	for (cpu = 0; cpu < cpus->nr; cpu++) {
-		if (evlist->mmap[cpu].base != NULL) {
-			munmap(evlist->mmap[cpu].base, evlist->mmap_len);
-			evlist->mmap[cpu].base = NULL;
-		}
-	}
-	return -1;
+	return perf_evlist__mmap_per_cpu(evlist, prot, mask);
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
@@ -342,7 +409,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
 	if (evlist->threads == NULL)
 		return -1;
 
-	if (target_tid != -1)
+	if (cpu_list == NULL && target_tid != -1)
 		evlist->cpus = cpu_map__dummy_new();
 	else
 		evlist->cpus = cpu_map__new(cpu_list);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8b1cb7a4c5f1..7109d7add14e 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -17,6 +17,7 @@ struct perf_evlist {
 	struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
 	int		 nr_entries;
 	int		 nr_fds;
+	int		 nr_mmaps;
 	int		 mmap_len;
 	bool		 overwrite;
 	union perf_event event_copy;
@@ -46,7 +47,7 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
 
 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
 
-union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu);
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
 
 int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
 int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 662596afd7f1..d6fd59beb860 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -175,7 +175,7 @@ int __perf_evsel__read(struct perf_evsel *evsel,
 }
 
 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-			      struct thread_map *threads, bool group, bool inherit)
+			      struct thread_map *threads, bool group)
 {
 	int cpu, thread;
 	unsigned long flags = 0;
@@ -192,19 +192,6 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 
 	for (cpu = 0; cpu < cpus->nr; cpu++) {
 		int group_fd = -1;
-		/*
-		 * Don't allow mmap() of inherited per-task counters. This
-		 * would create a performance issue due to all children writing
-		 * to the same buffer.
-		 *
-		 * FIXME:
-		 * Proper fix is not to pass 'inherit' to perf_evsel__open*,
-		 * but a 'flags' parameter, with 'group' folded there as well,
-		 * then introduce a PERF_O_{MMAP,GROUP,INHERIT} enum, and if
-		 * O_MMAP is set, emit a warning if cpu < 0 and O_INHERIT is
-		 * set. Lets go for the minimal fix first tho.
-		 */
-		evsel->attr.inherit = (cpus->map[cpu] >= 0) && inherit;
 
 		for (thread = 0; thread < threads->nr; thread++) {
 
@@ -253,7 +240,7 @@ static struct {
 };
 
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-		     struct thread_map *threads, bool group, bool inherit)
+		     struct thread_map *threads, bool group)
 {
 	if (cpus == NULL) {
 		/* Work around old compiler warnings about strict aliasing */
@@ -263,19 +250,19 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 	if (threads == NULL)
 		threads = &empty_thread_map.map;
 
-	return __perf_evsel__open(evsel, cpus, threads, group, inherit);
+	return __perf_evsel__open(evsel, cpus, threads, group);
 }
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
-			     struct cpu_map *cpus, bool group, bool inherit)
+			     struct cpu_map *cpus, bool group)
 {
-	return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit);
+	return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group);
 }
 
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
-				struct thread_map *threads, bool group, bool inherit)
+				struct thread_map *threads, bool group)
 {
-	return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit);
+	return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group);
 }
 
 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6710ab538342..f79bb2c09a6c 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -81,11 +81,11 @@ void perf_evsel__free_id(struct perf_evsel *evsel);
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
-			     struct cpu_map *cpus, bool group, bool inherit);
+			     struct cpu_map *cpus, bool group);
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
-				struct thread_map *threads, bool group, bool inherit);
+				struct thread_map *threads, bool group);
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
-		     struct thread_map *threads, bool group, bool inherit);
+		     struct thread_map *threads, bool group);
 
 #define perf_evsel__match(evsel, t, c)		\
 	(evsel->attr.type == PERF_TYPE_##t &&	\
diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h
new file mode 100644
index 000000000000..6789d788d494
--- /dev/null
+++ b/tools/perf/util/include/asm/alternative-asm.h
@@ -0,0 +1,8 @@
+#ifndef _PERF_ASM_ALTERNATIVE_ASM_H
+#define _PERF_ASM_ALTERNATIVE_ASM_H
+
+/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
+
+#define altinstruction_entry #
+
+#endif
diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h
index 356c7e467b83..99358d61e9a5 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/perf/util/include/linux/list.h
@@ -23,5 +23,5 @@ static inline void list_del_range(struct list_head *begin,
  * @head: the head for your list.
  */
 #define list_for_each_from(pos, head) \
-	for (; prefetch(pos->next), pos != (head); pos = pos->next)
+	for (; pos != (head); pos = pos->next)
 #endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 952b4ae3d954..41982c373faf 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -31,34 +31,36 @@ char debugfs_path[MAXPATHLEN];
 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
 
 static struct event_symbol event_symbols[] = {
-  { CHW(CPU_CYCLES),		"cpu-cycles",		"cycles"	},
-  { CHW(INSTRUCTIONS),		"instructions",		""		},
-  { CHW(CACHE_REFERENCES),	"cache-references",	""		},
-  { CHW(CACHE_MISSES),		"cache-misses",		""		},
-  { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
-  { CHW(BRANCH_MISSES),		"branch-misses",	""		},
-  { CHW(BUS_CYCLES),		"bus-cycles",		""		},
-
-  { CSW(CPU_CLOCK),		"cpu-clock",		""		},
-  { CSW(TASK_CLOCK),		"task-clock",		""		},
-  { CSW(PAGE_FAULTS),		"page-faults",		"faults"	},
-  { CSW(PAGE_FAULTS_MIN),	"minor-faults",		""		},
-  { CSW(PAGE_FAULTS_MAJ),	"major-faults",		""		},
-  { CSW(CONTEXT_SWITCHES),	"context-switches",	"cs"		},
-  { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
-  { CSW(ALIGNMENT_FAULTS),	"alignment-faults",	""		},
-  { CSW(EMULATION_FAULTS),	"emulation-faults",	""		},
+  { CHW(CPU_CYCLES),			"cpu-cycles",			"cycles"		},
+  { CHW(STALLED_CYCLES_FRONTEND),	"stalled-cycles-frontend",	"idle-cycles-frontend"	},
+  { CHW(STALLED_CYCLES_BACKEND),	"stalled-cycles-backend",	"idle-cycles-backend"	},
+  { CHW(INSTRUCTIONS),			"instructions",			""			},
+  { CHW(CACHE_REFERENCES),		"cache-references",		""			},
+  { CHW(CACHE_MISSES),			"cache-misses",			""			},
+  { CHW(BRANCH_INSTRUCTIONS),		"branch-instructions",		"branches"		},
+  { CHW(BRANCH_MISSES),			"branch-misses",		""			},
+  { CHW(BUS_CYCLES),			"bus-cycles",			""			},
+
+  { CSW(CPU_CLOCK),			"cpu-clock",			""			},
+  { CSW(TASK_CLOCK),			"task-clock",			""			},
+  { CSW(PAGE_FAULTS),			"page-faults",			"faults"		},
+  { CSW(PAGE_FAULTS_MIN),		"minor-faults",			""			},
+  { CSW(PAGE_FAULTS_MAJ),		"major-faults",			""			},
+  { CSW(CONTEXT_SWITCHES),		"context-switches",		"cs"			},
+  { CSW(CPU_MIGRATIONS),		"cpu-migrations",		"migrations"		},
+  { CSW(ALIGNMENT_FAULTS),		"alignment-faults",		""			},
+  { CSW(EMULATION_FAULTS),		"emulation-faults",		""			},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
 	((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
 
-#define PERF_EVENT_RAW(config)	__PERF_EVENT_FIELD(config, RAW)
+#define PERF_EVENT_RAW(config)		__PERF_EVENT_FIELD(config, RAW)
 #define PERF_EVENT_CONFIG(config)	__PERF_EVENT_FIELD(config, CONFIG)
-#define PERF_EVENT_TYPE(config)	__PERF_EVENT_FIELD(config, TYPE)
+#define PERF_EVENT_TYPE(config)		__PERF_EVENT_FIELD(config, TYPE)
 #define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
 
-static const char *hw_event_names[] = {
+static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
 	"cycles",
 	"instructions",
 	"cache-references",
@@ -66,11 +68,13 @@ static const char *hw_event_names[] = {
 	"branches",
 	"branch-misses",
 	"bus-cycles",
+	"stalled-cycles-frontend",
+	"stalled-cycles-backend",
 };
 
-static const char *sw_event_names[] = {
-	"cpu-clock-msecs",
-	"task-clock-msecs",
+static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
+	"cpu-clock",
+	"task-clock",
 	"page-faults",
 	"context-switches",
 	"CPU-migrations",
@@ -307,7 +311,7 @@ const char *__event_name(int type, u64 config)
 
 	switch (type) {
 	case PERF_TYPE_HARDWARE:
-		if (config < PERF_COUNT_HW_MAX)
+		if (config < PERF_COUNT_HW_MAX && hw_event_names[config])
 			return hw_event_names[config];
 		return "unknown-hardware";
 
@@ -333,7 +337,7 @@ const char *__event_name(int type, u64 config)
 	}
 
 	case PERF_TYPE_SOFTWARE:
-		if (config < PERF_COUNT_SW_MAX)
+		if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
 			return sw_event_names[config];
 		return "unknown-software";
 
@@ -648,13 +652,15 @@ static int check_events(const char *str, unsigned int i)
 	int n;
 
 	n = strlen(event_symbols[i].symbol);
-	if (!strncmp(str, event_symbols[i].symbol, n))
+	if (!strncasecmp(str, event_symbols[i].symbol, n))
 		return n;
 
 	n = strlen(event_symbols[i].alias);
-	if (n)
-		if (!strncmp(str, event_symbols[i].alias, n))
+	if (n) {
+		if (!strncasecmp(str, event_symbols[i].alias, n))
 			return n;
+	}
+
 	return 0;
 }
 
@@ -718,15 +724,22 @@ parse_numeric_event(const char **strp, struct perf_event_attr *attr)
 	return EVT_FAILED;
 }
 
-static enum event_result
+static int
 parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	int exclude = 0;
 	int eu = 0, ek = 0, eh = 0, precise = 0;
 
-	if (*str++ != ':')
+	if (!*str)
+		return 0;
+
+	if (*str == ',')
 		return 0;
+
+	if (*str++ != ':')
+		return -1;
+
 	while (*str) {
 		if (*str == 'u') {
 			if (!exclude)
@@ -747,14 +760,16 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 
 		++str;
 	}
-	if (str >= *strp + 2) {
-		*strp = str;
-		attr->exclude_user   = eu;
-		attr->exclude_kernel = ek;
-		attr->exclude_hv     = eh;
-		attr->precise_ip     = precise;
-		return 1;
-	}
+	if (str < *strp + 2)
+		return -1;
+
+	*strp = str;
+
+	attr->exclude_user   = eu;
+	attr->exclude_kernel = ek;
+	attr->exclude_hv     = eh;
+	attr->precise_ip     = precise;
+
 	return 0;
 }
 
@@ -797,7 +812,12 @@ parse_event_symbols(const struct option *opt, const char **str,
 	return EVT_FAILED;
 
 modifier:
-	parse_event_modifier(str, attr);
+	if (parse_event_modifier(str, attr) < 0) {
+		fprintf(stderr, "invalid event modifier: '%s'\n", *str);
+		fprintf(stderr, "Run 'perf list' for a list of valid events and modifiers\n");
+
+		return EVT_FAILED;
+	}
 
 	return ret;
 }
@@ -912,7 +932,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
 
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent.d_name, evt_dirent.d_name);
-			printf("  %-42s [%s]\n", evt_path,
+			printf("  %-50s [%s]\n", evt_path,
 				event_type_descriptors[PERF_TYPE_TRACEPOINT]);
 		}
 		closedir(evt_dir);
@@ -977,7 +997,7 @@ void print_events_type(u8 type)
 		else
 			snprintf(name, sizeof(name), "%s", syms->symbol);
 
-		printf("  %-42s [%s]\n", name,
+		printf("  %-50s [%s]\n", name,
 			event_type_descriptors[type]);
 	}
 }
@@ -995,11 +1015,10 @@ int print_hwcache_events(const char *event_glob)
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
 				char *name = event_cache_name(type, op, i);
 
-				if (event_glob != NULL && 
-				    !strglobmatch(name, event_glob))
+				if (event_glob != NULL && !strglobmatch(name, event_glob))
 					continue;
 
-				printf("  %-42s [%s]\n", name,
+				printf("  %-50s [%s]\n", name,
 					event_type_descriptors[PERF_TYPE_HW_CACHE]);
 				++printed;
 			}
@@ -1009,14 +1028,16 @@ int print_hwcache_events(const char *event_glob)
 	return printed;
 }
 
+#define MAX_NAME_LEN 100
+
 /*
  * Print the help text for the event symbols:
  */
 void print_events(const char *event_glob)
 {
-	struct event_symbol *syms = event_symbols;
 	unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0;
-	char name[40];
+	struct event_symbol *syms = event_symbols;
+	char name[MAX_NAME_LEN];
 
 	printf("\n");
 	printf("List of pre-defined events (to be used in -e):\n");
@@ -1036,10 +1057,10 @@ void print_events(const char *event_glob)
 			continue;
 
 		if (strlen(syms->alias))
-			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
+			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
 		else
-			strcpy(name, syms->symbol);
-		printf("  %-42s [%s]\n", name,
+			strncpy(name, syms->symbol, MAX_NAME_LEN);
+		printf("  %-50s [%s]\n", name,
 			event_type_descriptors[type]);
 
 		prev_type = type;
@@ -1056,12 +1077,12 @@ void print_events(const char *event_glob)
 		return;
 
 	printf("\n");
-	printf("  %-42s [%s]\n",
+	printf("  %-50s [%s]\n",
 		"rNNN (see 'perf list --help' on how to encode it)",
 	       event_type_descriptors[PERF_TYPE_RAW]);
 	printf("\n");
 
-	printf("  %-42s [%s]\n",
+	printf("  %-50s [%s]\n",
 			"mem:<addr>[:access]",
 			event_type_descriptors[PERF_TYPE_BREAKPOINT]);
 	printf("\n");
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index b7c85ce466a1..3b9d0b800d5c 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1471,6 +1471,38 @@ static int find_probe_point_by_func(struct probe_finder *pf)
 	return _param.retval;
 }
 
+struct pubname_callback_param {
+	char *function;
+	char *file;
+	Dwarf_Die *cu_die;
+	Dwarf_Die *sp_die;
+	int found;
+};
+
+static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
+{
+	struct pubname_callback_param *param = data;
+
+	if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) {
+		if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
+			return DWARF_CB_OK;
+
+		if (die_compare_name(param->sp_die, param->function)) {
+			if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
+				return DWARF_CB_OK;
+
+			if (param->file &&
+			    strtailcmp(param->file, dwarf_decl_file(param->sp_die)))
+				return DWARF_CB_OK;
+
+			param->found = 1;
+			return DWARF_CB_ABORT;
+		}
+	}
+
+	return DWARF_CB_OK;
+}
+
 /* Find probe points from debuginfo */
 static int find_probes(int fd, struct probe_finder *pf)
 {
@@ -1498,6 +1530,28 @@ static int find_probes(int fd, struct probe_finder *pf)
 
 	off = 0;
 	line_list__init(&pf->lcache);
+
+	/* Fastpath: lookup by function name from .debug_pubnames section */
+	if (pp->function) {
+		struct pubname_callback_param pubname_param = {
+			.function = pp->function,
+			.file	  = pp->file,
+			.cu_die	  = &pf->cu_die,
+			.sp_die	  = &pf->sp_die,
+			.found	  = 0,
+		};
+		struct dwarf_callback_param probe_param = {
+			.data = pf,
+		};
+
+		dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0);
+		if (pubname_param.found) {
+			ret = probe_point_search_cb(&pf->sp_die, &probe_param);
+			if (ret)
+				goto found;
+		}
+	}
+
 	/* Loop on CUs (Compilation Unit) */
 	while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
 		/* Get the DIE(Debugging Information Entry) of this CU */
@@ -1525,6 +1579,8 @@ static int find_probes(int fd, struct probe_finder *pf)
 		}
 		off = noff;
 	}
+
+found:
 	line_list__free(&pf->lcache);
 	if (dwfl)
 		dwfl_end(dwfl);
@@ -1946,6 +2002,22 @@ int find_line_range(int fd, struct line_range *lr)
 		return -EBADF;
 	}
 
+	/* Fastpath: lookup by function name from .debug_pubnames section */
+	if (lr->function) {
+		struct pubname_callback_param pubname_param = {
+			.function = lr->function, .file = lr->file,
+			.cu_die = &lf.cu_die, .sp_die = &lf.sp_die, .found = 0};
+		struct dwarf_callback_param line_range_param = {
+			.data = (void *)&lf, .retval = 0};
+
+		dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0);
+		if (pubname_param.found) {
+			line_range_search_cb(&lf.sp_die, &line_range_param);
+			if (lf.found)
+				goto found;
+		}
+	}
+
 	/* Loop on CUs (Compilation Unit) */
 	while (!lf.found && ret >= 0) {
 		if (dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) != 0)
@@ -1974,6 +2046,7 @@ int find_line_range(int fd, struct line_range *lr)
 		off = noff;
 	}
 
+found:
 	/* Store comp_dir */
 	if (lf.found) {
 		comp_dir = cu_get_comp_dir(&lf.cu_die);
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index beaefc3c1223..605730a366db 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -49,6 +49,7 @@ struct probe_finder {
 	Dwarf_Addr		addr;		/* Address */
 	const char		*fname;		/* Real file name */
 	Dwarf_Die		cu_die;		/* Current CU */
+	Dwarf_Die		sp_die;
 	struct list_head	lcache;		/* Line cache for lazy match */
 
 	/* For variable searching */
@@ -83,6 +84,7 @@ struct line_finder {
 	int			lno_s;		/* Start line number */
 	int			lno_e;		/* End line number */
 	Dwarf_Die		cu_die;		/* Current CU */
+	Dwarf_Die		sp_die;
 	int			found;
 };
 
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index a9f2d7e1204d..b5c7d818001c 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -498,11 +498,11 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
 	struct cpu_map *cpus = NULL;
 	struct thread_map *threads = NULL;
 	PyObject *pcpus = NULL, *pthreads = NULL;
-	int group = 0, overwrite = 0;
-	static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL};
+	int group = 0, inherit = 0;
+	static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL};
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
-					 &pcpus, &pthreads, &group, &overwrite))
+					 &pcpus, &pthreads, &group, &inherit))
 		return NULL;
 
 	if (pthreads != NULL)
@@ -511,7 +511,8 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
 	if (pcpus != NULL)
 		cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
 
-	if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) {
+	evsel->attr.inherit = inherit;
+	if (perf_evsel__open(evsel, cpus, threads, group) < 0) {
 		PyErr_SetFromErrno(PyExc_OSError);
 		return NULL;
 	}
@@ -679,7 +680,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 					 &cpu, &sample_id_all))
 		return NULL;
 
-	event = perf_evlist__read_on_cpu(evlist, cpu);
+	event = perf_evlist__mmap_read(evlist, cpu);
 	if (event != NULL) {
 		struct perf_evsel *first;
 		PyObject *pyevent = pyrf_event__new(event);
@@ -809,6 +810,9 @@ static struct {
 	{ "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
 	{ "COUNT_HW_CACHE_RESULT_MISS",   PERF_COUNT_HW_CACHE_RESULT_MISS },
 
+	{ "COUNT_HW_STALLED_CYCLES_FRONTEND",	  PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+	{ "COUNT_HW_STALLED_CYCLES_BACKEND",	  PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+
 	{ "COUNT_SW_CPU_CLOCK",	       PERF_COUNT_SW_CPU_CLOCK },
 	{ "COUNT_SW_TASK_CLOCK",       PERF_COUNT_SW_TASK_CLOCK },
 	{ "COUNT_SW_PAGE_FAULTS",      PERF_COUNT_SW_PAGE_FAULTS },
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index caa224522fea..fff66741f18d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1156,6 +1156,18 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
 	return ret;
 }
 
+struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
+					      unsigned int type)
+{
+	struct perf_evsel *pos;
+
+	list_for_each_entry(pos, &session->evlist->entries, node) {
+		if (pos->attr.type == type)
+			return pos;
+	}
+	return NULL;
+}
+
 void perf_session__print_symbols(union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_session *session)
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 1ac481fc1100..8daaa2d15396 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -162,6 +162,9 @@ static inline int perf_session__parse_sample(struct perf_session *session,
 					session->sample_id_all, sample);
 }
 
+struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
+					    unsigned int type);
+
 void perf_session__print_symbols(union perf_event *event,
 				 struct perf_sample *sample,
 				 struct perf_session *session);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f06c10f092ba..516876dfbe52 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -31,13 +31,13 @@
 #define NT_GNU_BUILD_ID 3
 #endif
 
-static bool dso__build_id_equal(const struct dso *self, u8 *build_id);
+static bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
 static int elf_read_build_id(Elf *elf, void *bf, size_t size);
 static void dsos__add(struct list_head *head, struct dso *dso);
 static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
-static int dso__load_kernel_sym(struct dso *self, struct map *map,
+static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 				symbol_filter_t filter);
-static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 			symbol_filter_t filter);
 static int vmlinux_path__nr_entries;
 static char **vmlinux_path;
@@ -49,27 +49,27 @@ struct symbol_conf symbol_conf = {
 	.symfs            = "",
 };
 
-int dso__name_len(const struct dso *self)
+int dso__name_len(const struct dso *dso)
 {
 	if (verbose)
-		return self->long_name_len;
+		return dso->long_name_len;
 
-	return self->short_name_len;
+	return dso->short_name_len;
 }
 
-bool dso__loaded(const struct dso *self, enum map_type type)
+bool dso__loaded(const struct dso *dso, enum map_type type)
 {
-	return self->loaded & (1 << type);
+	return dso->loaded & (1 << type);
 }
 
-bool dso__sorted_by_name(const struct dso *self, enum map_type type)
+bool dso__sorted_by_name(const struct dso *dso, enum map_type type)
 {
-	return self->sorted_by_name & (1 << type);
+	return dso->sorted_by_name & (1 << type);
 }
 
-static void dso__set_sorted_by_name(struct dso *self, enum map_type type)
+static void dso__set_sorted_by_name(struct dso *dso, enum map_type type)
 {
-	self->sorted_by_name |= (1 << type);
+	dso->sorted_by_name |= (1 << type);
 }
 
 bool symbol_type__is_a(char symbol_type, enum map_type map_type)
@@ -84,9 +84,9 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type)
 	}
 }
 
-static void symbols__fixup_end(struct rb_root *self)
+static void symbols__fixup_end(struct rb_root *symbols)
 {
-	struct rb_node *nd, *prevnd = rb_first(self);
+	struct rb_node *nd, *prevnd = rb_first(symbols);
 	struct symbol *curr, *prev;
 
 	if (prevnd == NULL)
@@ -107,10 +107,10 @@ static void symbols__fixup_end(struct rb_root *self)
 		curr->end = roundup(curr->start, 4096);
 }
 
-static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
+static void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 {
 	struct map *prev, *curr;
-	struct rb_node *nd, *prevnd = rb_first(&self->maps[type]);
+	struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]);
 
 	if (prevnd == NULL)
 		return;
@@ -130,128 +130,128 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
 	curr->end = ~0ULL;
 }
 
-static void map_groups__fixup_end(struct map_groups *self)
+static void map_groups__fixup_end(struct map_groups *mg)
 {
 	int i;
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		__map_groups__fixup_end(self, i);
+		__map_groups__fixup_end(mg, i);
 }
 
 static struct symbol *symbol__new(u64 start, u64 len, u8 binding,
 				  const char *name)
 {
 	size_t namelen = strlen(name) + 1;
-	struct symbol *self = calloc(1, (symbol_conf.priv_size +
-					 sizeof(*self) + namelen));
-	if (self == NULL)
+	struct symbol *sym = calloc(1, (symbol_conf.priv_size +
+					sizeof(*sym) + namelen));
+	if (sym == NULL)
 		return NULL;
 
 	if (symbol_conf.priv_size)
-		self = ((void *)self) + symbol_conf.priv_size;
-
-	self->start   = start;
-	self->end     = len ? start + len - 1 : start;
-	self->binding = binding;
-	self->namelen = namelen - 1;
+		sym = ((void *)sym) + symbol_conf.priv_size;
 
-	pr_debug4("%s: %s %#" PRIx64 "-%#" PRIx64 "\n", __func__, name, start, self->end);
+	sym->start   = start;
+	sym->end     = len ? start + len - 1 : start;
+	sym->binding = binding;
+	sym->namelen = namelen - 1;
 
-	memcpy(self->name, name, namelen);
+	pr_debug4("%s: %s %#" PRIx64 "-%#" PRIx64 "\n",
+		  __func__, name, start, sym->end);
+	memcpy(sym->name, name, namelen);
 
-	return self;
+	return sym;
 }
 
-void symbol__delete(struct symbol *self)
+void symbol__delete(struct symbol *sym)
 {
-	free(((void *)self) - symbol_conf.priv_size);
+	free(((void *)sym) - symbol_conf.priv_size);
 }
 
-static size_t symbol__fprintf(struct symbol *self, FILE *fp)
+static size_t symbol__fprintf(struct symbol *sym, FILE *fp)
 {
 	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
-		       self->start, self->end,
-		       self->binding == STB_GLOBAL ? 'g' :
-		       self->binding == STB_LOCAL  ? 'l' : 'w',
-		       self->name);
+		       sym->start, sym->end,
+		       sym->binding == STB_GLOBAL ? 'g' :
+		       sym->binding == STB_LOCAL  ? 'l' : 'w',
+		       sym->name);
 }
 
-void dso__set_long_name(struct dso *self, char *name)
+void dso__set_long_name(struct dso *dso, char *name)
 {
 	if (name == NULL)
 		return;
-	self->long_name = name;
-	self->long_name_len = strlen(name);
+	dso->long_name = name;
+	dso->long_name_len = strlen(name);
 }
 
-static void dso__set_short_name(struct dso *self, const char *name)
+static void dso__set_short_name(struct dso *dso, const char *name)
 {
 	if (name == NULL)
 		return;
-	self->short_name = name;
-	self->short_name_len = strlen(name);
+	dso->short_name = name;
+	dso->short_name_len = strlen(name);
 }
 
-static void dso__set_basename(struct dso *self)
+static void dso__set_basename(struct dso *dso)
 {
-	dso__set_short_name(self, basename(self->long_name));
+	dso__set_short_name(dso, basename(dso->long_name));
 }
 
 struct dso *dso__new(const char *name)
 {
-	struct dso *self = calloc(1, sizeof(*self) + strlen(name) + 1);
+	struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
 
-	if (self != NULL) {
+	if (dso != NULL) {
 		int i;
-		strcpy(self->name, name);
-		dso__set_long_name(self, self->name);
-		dso__set_short_name(self, self->name);
+		strcpy(dso->name, name);
+		dso__set_long_name(dso, dso->name);
+		dso__set_short_name(dso, dso->name);
 		for (i = 0; i < MAP__NR_TYPES; ++i)
-			self->symbols[i] = self->symbol_names[i] = RB_ROOT;
-		self->symtab_type = SYMTAB__NOT_FOUND;
-		self->loaded = 0;
-		self->sorted_by_name = 0;
-		self->has_build_id = 0;
-		self->kernel = DSO_TYPE_USER;
-		INIT_LIST_HEAD(&self->node);
+			dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
+		dso->symtab_type = SYMTAB__NOT_FOUND;
+		dso->loaded = 0;
+		dso->sorted_by_name = 0;
+		dso->has_build_id = 0;
+		dso->kernel = DSO_TYPE_USER;
+		INIT_LIST_HEAD(&dso->node);
 	}
 
-	return self;
+	return dso;
 }
 
-static void symbols__delete(struct rb_root *self)
+static void symbols__delete(struct rb_root *symbols)
 {
 	struct symbol *pos;
-	struct rb_node *next = rb_first(self);
+	struct rb_node *next = rb_first(symbols);
 
 	while (next) {
 		pos = rb_entry(next, struct symbol, rb_node);
 		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, self);
+		rb_erase(&pos->rb_node, symbols);
 		symbol__delete(pos);
 	}
 }
 
-void dso__delete(struct dso *self)
+void dso__delete(struct dso *dso)
 {
 	int i;
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		symbols__delete(&self->symbols[i]);
-	if (self->sname_alloc)
-		free((char *)self->short_name);
-	if (self->lname_alloc)
-		free(self->long_name);
-	free(self);
+		symbols__delete(&dso->symbols[i]);
+	if (dso->sname_alloc)
+		free((char *)dso->short_name);
+	if (dso->lname_alloc)
+		free(dso->long_name);
+	free(dso);
 }
 
-void dso__set_build_id(struct dso *self, void *build_id)
+void dso__set_build_id(struct dso *dso, void *build_id)
 {
-	memcpy(self->build_id, build_id, sizeof(self->build_id));
-	self->has_build_id = 1;
+	memcpy(dso->build_id, build_id, sizeof(dso->build_id));
+	dso->has_build_id = 1;
 }
 
-static void symbols__insert(struct rb_root *self, struct symbol *sym)
+static void symbols__insert(struct rb_root *symbols, struct symbol *sym)
 {
-	struct rb_node **p = &self->rb_node;
+	struct rb_node **p = &symbols->rb_node;
 	struct rb_node *parent = NULL;
 	const u64 ip = sym->start;
 	struct symbol *s;
@@ -265,17 +265,17 @@ static void symbols__insert(struct rb_root *self, struct symbol *sym)
 			p = &(*p)->rb_right;
 	}
 	rb_link_node(&sym->rb_node, parent, p);
-	rb_insert_color(&sym->rb_node, self);
+	rb_insert_color(&sym->rb_node, symbols);
 }
 
-static struct symbol *symbols__find(struct rb_root *self, u64 ip)
+static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
 {
 	struct rb_node *n;
 
-	if (self == NULL)
+	if (symbols == NULL)
 		return NULL;
 
-	n = self->rb_node;
+	n = symbols->rb_node;
 
 	while (n) {
 		struct symbol *s = rb_entry(n, struct symbol, rb_node);
@@ -296,9 +296,9 @@ struct symbol_name_rb_node {
 	struct symbol	sym;
 };
 
-static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
+static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
 {
-	struct rb_node **p = &self->rb_node;
+	struct rb_node **p = &symbols->rb_node;
 	struct rb_node *parent = NULL;
 	struct symbol_name_rb_node *symn, *s;
 
@@ -313,27 +313,29 @@ static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
 			p = &(*p)->rb_right;
 	}
 	rb_link_node(&symn->rb_node, parent, p);
-	rb_insert_color(&symn->rb_node, self);
+	rb_insert_color(&symn->rb_node, symbols);
 }
 
-static void symbols__sort_by_name(struct rb_root *self, struct rb_root *source)
+static void symbols__sort_by_name(struct rb_root *symbols,
+				  struct rb_root *source)
 {
 	struct rb_node *nd;
 
 	for (nd = rb_first(source); nd; nd = rb_next(nd)) {
 		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
-		symbols__insert_by_name(self, pos);
+		symbols__insert_by_name(symbols, pos);
 	}
 }
 
-static struct symbol *symbols__find_by_name(struct rb_root *self, const char *name)
+static struct symbol *symbols__find_by_name(struct rb_root *symbols,
+					    const char *name)
 {
 	struct rb_node *n;
 
-	if (self == NULL)
+	if (symbols == NULL)
 		return NULL;
 
-	n = self->rb_node;
+	n = symbols->rb_node;
 
 	while (n) {
 		struct symbol_name_rb_node *s;
@@ -353,29 +355,29 @@ static struct symbol *symbols__find_by_name(struct rb_root *self, const char *na
 	return NULL;
 }
 
-struct symbol *dso__find_symbol(struct dso *self,
+struct symbol *dso__find_symbol(struct dso *dso,
 				enum map_type type, u64 addr)
 {
-	return symbols__find(&self->symbols[type], addr);
+	return symbols__find(&dso->symbols[type], addr);
 }
 
-struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
+struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name)
 {
-	return symbols__find_by_name(&self->symbol_names[type], name);
+	return symbols__find_by_name(&dso->symbol_names[type], name);
 }
 
-void dso__sort_by_name(struct dso *self, enum map_type type)
+void dso__sort_by_name(struct dso *dso, enum map_type type)
 {
-	dso__set_sorted_by_name(self, type);
-	return symbols__sort_by_name(&self->symbol_names[type],
-				     &self->symbols[type]);
+	dso__set_sorted_by_name(dso, type);
+	return symbols__sort_by_name(&dso->symbol_names[type],
+				     &dso->symbols[type]);
 }
 
-int build_id__sprintf(const u8 *self, int len, char *bf)
+int build_id__sprintf(const u8 *build_id, int len, char *bf)
 {
 	char *bid = bf;
-	const u8 *raw = self;
+	const u8 *raw = build_id;
 	int i;
 
 	for (i = 0; i < len; ++i) {
@@ -384,24 +386,25 @@ int build_id__sprintf(const u8 *self, int len, char *bf)
 		bid += 2;
 	}
 
-	return raw - self;
+	return raw - build_id;
 }
 
-size_t dso__fprintf_buildid(struct dso *self, FILE *fp)
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
 {
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
-	build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id);
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
 	return fprintf(fp, "%s", sbuild_id);
 }
 
-size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp)
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+				    enum map_type type, FILE *fp)
 {
 	size_t ret = 0;
 	struct rb_node *nd;
 	struct symbol_name_rb_node *pos;
 
-	for (nd = rb_first(&self->symbol_names[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
 		pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
 		fprintf(fp, "%s\n", pos->sym.name);
 	}
@@ -409,18 +412,18 @@ size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *
 	return ret;
 }
 
-size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
+size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
 {
 	struct rb_node *nd;
-	size_t ret = fprintf(fp, "dso: %s (", self->short_name);
+	size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
 
-	if (self->short_name != self->long_name)
-		ret += fprintf(fp, "%s, ", self->long_name);
+	if (dso->short_name != dso->long_name)
+		ret += fprintf(fp, "%s, ", dso->long_name);
 	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
-		       self->loaded ? "" : "NOT ");
-	ret += dso__fprintf_buildid(self, fp);
+		       dso->loaded ? "" : "NOT ");
+	ret += dso__fprintf_buildid(dso, fp);
 	ret += fprintf(fp, ")\n");
-	for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
 		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
 		ret += symbol__fprintf(pos, fp);
 	}
@@ -543,10 +546,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
  * so that we can in the next step set the symbol ->end address and then
  * call kernel_maps__split_kallsyms.
  */
-static int dso__load_all_kallsyms(struct dso *self, const char *filename,
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
 				  struct map *map)
 {
-	struct process_kallsyms_args args = { .map = map, .dso = self, };
+	struct process_kallsyms_args args = { .map = map, .dso = dso, };
 	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
 }
 
@@ -555,7 +558,7 @@ static int dso__load_all_kallsyms(struct dso *self, const char *filename,
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int dso__split_kallsyms(struct dso *self, struct map *map,
+static int dso__split_kallsyms(struct dso *dso, struct map *map,
 			       symbol_filter_t filter)
 {
 	struct map_groups *kmaps = map__kmap(map)->kmaps;
@@ -563,7 +566,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 	struct map *curr_map = map;
 	struct symbol *pos;
 	int count = 0, moved = 0;	
-	struct rb_root *root = &self->symbols[map->type];
+	struct rb_root *root = &dso->symbols[map->type];
 	struct rb_node *next = rb_first(root);
 	int kernel_range = 0;
 
@@ -582,7 +585,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 
 			if (strcmp(curr_map->dso->short_name, module)) {
 				if (curr_map != map &&
-				    self->kernel == DSO_TYPE_GUEST_KERNEL &&
+				    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
 				    machine__is_default_guest(machine)) {
 					/*
 					 * We assume all symbols of a module are
@@ -618,14 +621,14 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 			pos->end   = curr_map->map_ip(curr_map, pos->end);
 		} else if (curr_map != map) {
 			char dso_name[PATH_MAX];
-			struct dso *dso;
+			struct dso *ndso;
 
 			if (count == 0) {
 				curr_map = map;
 				goto filter_symbol;
 			}
 
-			if (self->kernel == DSO_TYPE_GUEST_KERNEL)
+			if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
 				snprintf(dso_name, sizeof(dso_name),
 					"[guest.kernel].%d",
 					kernel_range++);
@@ -634,15 +637,15 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 					"[kernel].%d",
 					kernel_range++);
 
-			dso = dso__new(dso_name);
-			if (dso == NULL)
+			ndso = dso__new(dso_name);
+			if (ndso == NULL)
 				return -1;
 
-			dso->kernel = self->kernel;
+			ndso->kernel = dso->kernel;
 
-			curr_map = map__new2(pos->start, dso, map->type);
+			curr_map = map__new2(pos->start, ndso, map->type);
 			if (curr_map == NULL) {
-				dso__delete(dso);
+				dso__delete(ndso);
 				return -1;
 			}
 
@@ -665,7 +668,7 @@ discard_symbol:		rb_erase(&pos->rb_node, root);
 	}
 
 	if (curr_map != map &&
-	    self->kernel == DSO_TYPE_GUEST_KERNEL &&
+	    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
 	    machine__is_default_guest(kmaps->machine)) {
 		dso__set_loaded(curr_map->dso, curr_map->type);
 	}
@@ -673,21 +676,21 @@ discard_symbol:		rb_erase(&pos->rb_node, root);
 	return count + moved;
 }
 
-int dso__load_kallsyms(struct dso *self, const char *filename,
+int dso__load_kallsyms(struct dso *dso, const char *filename,
 		       struct map *map, symbol_filter_t filter)
 {
-	if (dso__load_all_kallsyms(self, filename, map) < 0)
+	if (dso__load_all_kallsyms(dso, filename, map) < 0)
 		return -1;
 
-	if (self->kernel == DSO_TYPE_GUEST_KERNEL)
-		self->symtab_type = SYMTAB__GUEST_KALLSYMS;
+	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		dso->symtab_type = SYMTAB__GUEST_KALLSYMS;
 	else
-		self->symtab_type = SYMTAB__KALLSYMS;
+		dso->symtab_type = SYMTAB__KALLSYMS;
 
-	return dso__split_kallsyms(self, map, filter);
+	return dso__split_kallsyms(dso, map, filter);
 }
 
-static int dso__load_perf_map(struct dso *self, struct map *map,
+static int dso__load_perf_map(struct dso *dso, struct map *map,
 			      symbol_filter_t filter)
 {
 	char *line = NULL;
@@ -695,7 +698,7 @@ static int dso__load_perf_map(struct dso *self, struct map *map,
 	FILE *file;
 	int nr_syms = 0;
 
-	file = fopen(self->long_name, "r");
+	file = fopen(dso->long_name, "r");
 	if (file == NULL)
 		goto out_failure;
 
@@ -733,7 +736,7 @@ static int dso__load_perf_map(struct dso *self, struct map *map,
 		if (filter && filter(map, sym))
 			symbol__delete(sym);
 		else {
-			symbols__insert(&self->symbols[map->type], sym);
+			symbols__insert(&dso->symbols[map->type], sym);
 			nr_syms++;
 		}
 	}
@@ -752,7 +755,7 @@ out_failure:
 /**
  * elf_symtab__for_each_symbol - iterate thru all the symbols
  *
- * @self: struct elf_symtab instance to iterate
+ * @syms: struct elf_symtab instance to iterate
  * @idx: uint32_t idx
  * @sym: GElf_Sym iterator
  */
@@ -852,7 +855,7 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
  * And always look at the original dso, not at debuginfo packages, that
  * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
  */
-static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
+static int dso__synthesize_plt_symbols(struct  dso *dso, struct map *map,
 				       symbol_filter_t filter)
 {
 	uint32_t nr_rel_entries, idx;
@@ -871,7 +874,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
 	char name[PATH_MAX];
 
 	snprintf(name, sizeof(name), "%s%s",
-		 symbol_conf.symfs, self->long_name);
+		 symbol_conf.symfs, dso->long_name);
 	fd = open(name, O_RDONLY);
 	if (fd < 0)
 		goto out;
@@ -947,7 +950,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
 			if (filter && filter(map, f))
 				symbol__delete(f);
 			else {
-				symbols__insert(&self->symbols[map->type], f);
+				symbols__insert(&dso->symbols[map->type], f);
 				++nr;
 			}
 		}
@@ -969,7 +972,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
 			if (filter && filter(map, f))
 				symbol__delete(f);
 			else {
-				symbols__insert(&self->symbols[map->type], f);
+				symbols__insert(&dso->symbols[map->type], f);
 				++nr;
 			}
 		}
@@ -985,29 +988,30 @@ out_close:
 		return nr;
 out:
 	pr_debug("%s: problems reading %s PLT info.\n",
-		 __func__, self->long_name);
+		 __func__, dso->long_name);
 	return 0;
 }
 
-static bool elf_sym__is_a(GElf_Sym *self, enum map_type type)
+static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type)
 {
 	switch (type) {
 	case MAP__FUNCTION:
-		return elf_sym__is_function(self);
+		return elf_sym__is_function(sym);
 	case MAP__VARIABLE:
-		return elf_sym__is_object(self);
+		return elf_sym__is_object(sym);
 	default:
 		return false;
 	}
 }
 
-static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type)
+static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs,
+			  enum map_type type)
 {
 	switch (type) {
 	case MAP__FUNCTION:
-		return elf_sec__is_text(self, secstrs);
+		return elf_sec__is_text(shdr, secstrs);
 	case MAP__VARIABLE:
-		return elf_sec__is_data(self, secstrs);
+		return elf_sec__is_data(shdr, secstrs);
 	default:
 		return false;
 	}
@@ -1032,13 +1036,13 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
 	return -1;
 }
 
-static int dso__load_sym(struct dso *self, struct map *map, const char *name,
+static int dso__load_sym(struct dso *dso, struct map *map, const char *name,
 			 int fd, symbol_filter_t filter, int kmodule,
 			 int want_symtab)
 {
-	struct kmap *kmap = self->kernel ? map__kmap(map) : NULL;
+	struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
 	struct map *curr_map = map;
-	struct dso *curr_dso = self;
+	struct dso *curr_dso = dso;
 	Elf_Data *symstrs, *secstrs;
 	uint32_t nr_syms;
 	int err = -1;
@@ -1064,14 +1068,14 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 	}
 
 	/* Always reject images with a mismatched build-id: */
-	if (self->has_build_id) {
+	if (dso->has_build_id) {
 		u8 build_id[BUILD_ID_SIZE];
 
 		if (elf_read_build_id(elf, build_id,
 				      BUILD_ID_SIZE) != BUILD_ID_SIZE)
 			goto out_elf_end;
 
-		if (!dso__build_id_equal(self, build_id))
+		if (!dso__build_id_equal(dso, build_id))
 			goto out_elf_end;
 	}
 
@@ -1112,13 +1116,14 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
 	memset(&sym, 0, sizeof(sym));
-	if (self->kernel == DSO_TYPE_USER) {
-		self->adjust_symbols = (ehdr.e_type == ET_EXEC ||
+	if (dso->kernel == DSO_TYPE_USER) {
+		dso->adjust_symbols = (ehdr.e_type == ET_EXEC ||
 				elf_section_by_name(elf, &ehdr, &shdr,
 						     ".gnu.prelink_undo",
 						     NULL) != NULL);
-	} else self->adjust_symbols = 0;
-
+	} else {
+		dso->adjust_symbols = 0;
+	}
 	elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
 		struct symbol *f;
 		const char *elf_name = elf_sym__name(&sym, symstrs);
@@ -1168,22 +1173,22 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 		    (sym.st_value & 1))
 			--sym.st_value;
 
-		if (self->kernel != DSO_TYPE_USER || kmodule) {
+		if (dso->kernel != DSO_TYPE_USER || kmodule) {
 			char dso_name[PATH_MAX];
 
 			if (strcmp(section_name,
 				   (curr_dso->short_name +
-				    self->short_name_len)) == 0)
+				    dso->short_name_len)) == 0)
 				goto new_symbol;
 
 			if (strcmp(section_name, ".text") == 0) {
 				curr_map = map;
-				curr_dso = self;
+				curr_dso = dso;
 				goto new_symbol;
 			}
 
 			snprintf(dso_name, sizeof(dso_name),
-				 "%s%s", self->short_name, section_name);
+				 "%s%s", dso->short_name, section_name);
 
 			curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name);
 			if (curr_map == NULL) {
@@ -1195,9 +1200,9 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 				curr_dso = dso__new(dso_name);
 				if (curr_dso == NULL)
 					goto out_elf_end;
-				curr_dso->kernel = self->kernel;
-				curr_dso->long_name = self->long_name;
-				curr_dso->long_name_len = self->long_name_len;
+				curr_dso->kernel = dso->kernel;
+				curr_dso->long_name = dso->long_name;
+				curr_dso->long_name_len = dso->long_name_len;
 				curr_map = map__new2(start, curr_dso,
 						     map->type);
 				if (curr_map == NULL) {
@@ -1206,9 +1211,9 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 				}
 				curr_map->map_ip = identity__map_ip;
 				curr_map->unmap_ip = identity__map_ip;
-				curr_dso->symtab_type = self->symtab_type;
+				curr_dso->symtab_type = dso->symtab_type;
 				map_groups__insert(kmap->kmaps, curr_map);
-				dsos__add(&self->node, curr_dso);
+				dsos__add(&dso->node, curr_dso);
 				dso__set_loaded(curr_dso, map->type);
 			} else
 				curr_dso = curr_map->dso;
@@ -1250,7 +1255,7 @@ new_symbol:
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
 	if (nr > 0) {
-		symbols__fixup_end(&self->symbols[map->type]);
+		symbols__fixup_end(&dso->symbols[map->type]);
 		if (kmap) {
 			/*
 			 * We need to fixup this here too because we create new
@@ -1266,9 +1271,9 @@ out_close:
 	return err;
 }
 
-static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
+static bool dso__build_id_equal(const struct dso *dso, u8 *build_id)
 {
-	return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
+	return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0;
 }
 
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
@@ -1429,7 +1434,7 @@ out:
 	return err;
 }
 
-char dso__symtab_origin(const struct dso *self)
+char dso__symtab_origin(const struct dso *dso)
 {
 	static const char origin[] = {
 		[SYMTAB__KALLSYMS]	      = 'k',
@@ -1444,12 +1449,12 @@ char dso__symtab_origin(const struct dso *self)
 		[SYMTAB__GUEST_KMODULE]	      =  'G',
 	};
 
-	if (self == NULL || self->symtab_type == SYMTAB__NOT_FOUND)
+	if (dso == NULL || dso->symtab_type == SYMTAB__NOT_FOUND)
 		return '!';
-	return origin[self->symtab_type];
+	return origin[dso->symtab_type];
 }
 
-int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
+int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 {
 	int size = PATH_MAX;
 	char *name;
@@ -1459,12 +1464,12 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 	const char *root_dir;
 	int want_symtab;
 
-	dso__set_loaded(self, map->type);
+	dso__set_loaded(dso, map->type);
 
-	if (self->kernel == DSO_TYPE_KERNEL)
-		return dso__load_kernel_sym(self, map, filter);
-	else if (self->kernel == DSO_TYPE_GUEST_KERNEL)
-		return dso__load_guest_kernel_sym(self, map, filter);
+	if (dso->kernel == DSO_TYPE_KERNEL)
+		return dso__load_kernel_sym(dso, map, filter);
+	else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		return dso__load_guest_kernel_sym(dso, map, filter);
 
 	if (map->groups && map->groups->machine)
 		machine = map->groups->machine;
@@ -1475,11 +1480,11 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 	if (!name)
 		return -1;
 
-	self->adjust_symbols = 0;
+	dso->adjust_symbols = 0;
 
-	if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
-		ret = dso__load_perf_map(self, map, filter);
-		self->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT :
+	if (strncmp(dso->name, "/tmp/perf-", 10) == 0) {
+		ret = dso__load_perf_map(dso, map, filter);
+		dso->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT :
 					      SYMTAB__NOT_FOUND;
 		return ret;
 	}
@@ -1490,33 +1495,33 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 	 */
 	want_symtab = 1;
 restart:
-	for (self->symtab_type = SYMTAB__BUILD_ID_CACHE;
-	     self->symtab_type != SYMTAB__NOT_FOUND;
-	     self->symtab_type++) {
-		switch (self->symtab_type) {
+	for (dso->symtab_type = SYMTAB__BUILD_ID_CACHE;
+	     dso->symtab_type != SYMTAB__NOT_FOUND;
+	     dso->symtab_type++) {
+		switch (dso->symtab_type) {
 		case SYMTAB__BUILD_ID_CACHE:
 			/* skip the locally configured cache if a symfs is given */
 			if (symbol_conf.symfs[0] ||
-			    (dso__build_id_filename(self, name, size) == NULL)) {
+			    (dso__build_id_filename(dso, name, size) == NULL)) {
 				continue;
 			}
 			break;
 		case SYMTAB__FEDORA_DEBUGINFO:
 			snprintf(name, size, "%s/usr/lib/debug%s.debug",
-				 symbol_conf.symfs, self->long_name);
+				 symbol_conf.symfs, dso->long_name);
 			break;
 		case SYMTAB__UBUNTU_DEBUGINFO:
 			snprintf(name, size, "%s/usr/lib/debug%s",
-				 symbol_conf.symfs, self->long_name);
+				 symbol_conf.symfs, dso->long_name);
 			break;
 		case SYMTAB__BUILDID_DEBUGINFO: {
 			char build_id_hex[BUILD_ID_SIZE * 2 + 1];
 
-			if (!self->has_build_id)
+			if (!dso->has_build_id)
 				continue;
 
-			build_id__sprintf(self->build_id,
-					  sizeof(self->build_id),
+			build_id__sprintf(dso->build_id,
+					  sizeof(dso->build_id),
 					  build_id_hex);
 			snprintf(name, size,
 				 "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
@@ -1525,7 +1530,7 @@ restart:
 			break;
 		case SYMTAB__SYSTEM_PATH_DSO:
 			snprintf(name, size, "%s%s",
-			     symbol_conf.symfs, self->long_name);
+			     symbol_conf.symfs, dso->long_name);
 			break;
 		case SYMTAB__GUEST_KMODULE:
 			if (map->groups && machine)
@@ -1533,12 +1538,12 @@ restart:
 			else
 				root_dir = "";
 			snprintf(name, size, "%s%s%s", symbol_conf.symfs,
-				 root_dir, self->long_name);
+				 root_dir, dso->long_name);
 			break;
 
 		case SYMTAB__SYSTEM_PATH_KMODULE:
 			snprintf(name, size, "%s%s", symbol_conf.symfs,
-				 self->long_name);
+				 dso->long_name);
 			break;
 		default:;
 		}
@@ -1548,7 +1553,7 @@ restart:
 		if (fd < 0)
 			continue;
 
-		ret = dso__load_sym(self, map, name, fd, filter, 0,
+		ret = dso__load_sym(dso, map, name, fd, filter, 0,
 				    want_symtab);
 		close(fd);
 
@@ -1560,7 +1565,8 @@ restart:
 			continue;
 
 		if (ret > 0) {
-			int nr_plt = dso__synthesize_plt_symbols(self, map, filter);
+			int nr_plt = dso__synthesize_plt_symbols(dso, map,
+								 filter);
 			if (nr_plt > 0)
 				ret += nr_plt;
 			break;
@@ -1577,17 +1583,17 @@ restart:
 	}
 
 	free(name);
-	if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
+	if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
 		return 0;
 	return ret;
 }
 
-struct map *map_groups__find_by_name(struct map_groups *self,
+struct map *map_groups__find_by_name(struct map_groups *mg,
 				     enum map_type type, const char *name)
 {
 	struct rb_node *nd;
 
-	for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
 		struct map *map = rb_entry(nd, struct map, rb_node);
 
 		if (map->dso && strcmp(map->dso->short_name, name) == 0)
@@ -1597,28 +1603,28 @@ struct map *map_groups__find_by_name(struct map_groups *self,
 	return NULL;
 }
 
-static int dso__kernel_module_get_build_id(struct dso *self,
-				const char *root_dir)
+static int dso__kernel_module_get_build_id(struct dso *dso,
+					   const char *root_dir)
 {
 	char filename[PATH_MAX];
 	/*
 	 * kernel module short names are of the form "[module]" and
 	 * we need just "module" here.
 	 */
-	const char *name = self->short_name + 1;
+	const char *name = dso->short_name + 1;
 
 	snprintf(filename, sizeof(filename),
 		 "%s/sys/module/%.*s/notes/.note.gnu.build-id",
 		 root_dir, (int)strlen(name) - 1, name);
 
-	if (sysfs__read_build_id(filename, self->build_id,
-				 sizeof(self->build_id)) == 0)
-		self->has_build_id = true;
+	if (sysfs__read_build_id(filename, dso->build_id,
+				 sizeof(dso->build_id)) == 0)
+		dso->has_build_id = true;
 
 	return 0;
 }
 
-static int map_groups__set_modules_path_dir(struct map_groups *self,
+static int map_groups__set_modules_path_dir(struct map_groups *mg,
 				const char *dir_name)
 {
 	struct dirent *dent;
@@ -1646,7 +1652,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *self,
 
 			snprintf(path, sizeof(path), "%s/%s",
 				 dir_name, dent->d_name);
-			ret = map_groups__set_modules_path_dir(self, path);
+			ret = map_groups__set_modules_path_dir(mg, path);
 			if (ret < 0)
 				goto out;
 		} else {
@@ -1661,7 +1667,8 @@ static int map_groups__set_modules_path_dir(struct map_groups *self,
 				 (int)(dot - dent->d_name), dent->d_name);
 
 			strxfrchar(dso_name, '-', '_');
-			map = map_groups__find_by_name(self, MAP__FUNCTION, dso_name);
+			map = map_groups__find_by_name(mg, MAP__FUNCTION,
+						       dso_name);
 			if (map == NULL)
 				continue;
 
@@ -1711,20 +1718,20 @@ static char *get_kernel_version(const char *root_dir)
 	return strdup(name);
 }
 
-static int machine__set_modules_path(struct machine *self)
+static int machine__set_modules_path(struct machine *machine)
 {
 	char *version;
 	char modules_path[PATH_MAX];
 
-	version = get_kernel_version(self->root_dir);
+	version = get_kernel_version(machine->root_dir);
 	if (!version)
 		return -1;
 
 	snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel",
-		 self->root_dir, version);
+		 machine->root_dir, version);
 	free(version);
 
-	return map_groups__set_modules_path_dir(&self->kmaps, modules_path);
+	return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
 }
 
 /*
@@ -1734,23 +1741,23 @@ static int machine__set_modules_path(struct machine *self)
  */
 static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 {
-	struct map *self = calloc(1, (sizeof(*self) +
-				      (dso->kernel ? sizeof(struct kmap) : 0)));
-	if (self != NULL) {
+	struct map *map = calloc(1, (sizeof(*map) +
+				     (dso->kernel ? sizeof(struct kmap) : 0)));
+	if (map != NULL) {
 		/*
 		 * ->end will be filled after we load all the symbols
 		 */
-		map__init(self, type, start, 0, 0, dso);
+		map__init(map, type, start, 0, 0, dso);
 	}
 
-	return self;
+	return map;
 }
 
-struct map *machine__new_module(struct machine *self, u64 start,
+struct map *machine__new_module(struct machine *machine, u64 start,
 				const char *filename)
 {
 	struct map *map;
-	struct dso *dso = __dsos__findnew(&self->kernel_dsos, filename);
+	struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
 
 	if (dso == NULL)
 		return NULL;
@@ -1759,15 +1766,15 @@ struct map *machine__new_module(struct machine *self, u64 start,
 	if (map == NULL)
 		return NULL;
 
-	if (machine__is_host(self))
+	if (machine__is_host(machine))
 		dso->symtab_type = SYMTAB__SYSTEM_PATH_KMODULE;
 	else
 		dso->symtab_type = SYMTAB__GUEST_KMODULE;
-	map_groups__insert(&self->kmaps, map);
+	map_groups__insert(&machine->kmaps, map);
 	return map;
 }
 
-static int machine__create_modules(struct machine *self)
+static int machine__create_modules(struct machine *machine)
 {
 	char *line = NULL;
 	size_t n;
@@ -1776,10 +1783,10 @@ static int machine__create_modules(struct machine *self)
 	const char *modules;
 	char path[PATH_MAX];
 
-	if (machine__is_default_guest(self))
+	if (machine__is_default_guest(machine))
 		modules = symbol_conf.default_guest_modules;
 	else {
-		sprintf(path, "%s/proc/modules", self->root_dir);
+		sprintf(path, "%s/proc/modules", machine->root_dir);
 		modules = path;
 	}
 
@@ -1815,16 +1822,16 @@ static int machine__create_modules(struct machine *self)
 		*sep = '\0';
 
 		snprintf(name, sizeof(name), "[%s]", line);
-		map = machine__new_module(self, start, name);
+		map = machine__new_module(machine, start, name);
 		if (map == NULL)
 			goto out_delete_line;
-		dso__kernel_module_get_build_id(map->dso, self->root_dir);
+		dso__kernel_module_get_build_id(map->dso, machine->root_dir);
 	}
 
 	free(line);
 	fclose(file);
 
-	return machine__set_modules_path(self);
+	return machine__set_modules_path(machine);
 
 out_delete_line:
 	free(line);
@@ -1832,7 +1839,7 @@ out_failure:
 	return -1;
 }
 
-int dso__load_vmlinux(struct dso *self, struct map *map,
+int dso__load_vmlinux(struct dso *dso, struct map *map,
 		      const char *vmlinux, symbol_filter_t filter)
 {
 	int err = -1, fd;
@@ -1844,9 +1851,9 @@ int dso__load_vmlinux(struct dso *self, struct map *map,
 	if (fd < 0)
 		return -1;
 
-	dso__set_long_name(self, (char *)vmlinux);
-	dso__set_loaded(self, map->type);
-	err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0);
+	dso__set_long_name(dso, (char *)vmlinux);
+	dso__set_loaded(dso, map->type);
+	err = dso__load_sym(dso, map, symfs_vmlinux, fd, filter, 0, 0);
 	close(fd);
 
 	if (err > 0)
@@ -1855,7 +1862,7 @@ int dso__load_vmlinux(struct dso *self, struct map *map,
 	return err;
 }
 
-int dso__load_vmlinux_path(struct dso *self, struct map *map,
+int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 			   symbol_filter_t filter)
 {
 	int i, err = 0;
@@ -1864,20 +1871,20 @@ int dso__load_vmlinux_path(struct dso *self, struct map *map,
 	pr_debug("Looking at the vmlinux_path (%d entries long)\n",
 		 vmlinux_path__nr_entries + 1);
 
-	filename = dso__build_id_filename(self, NULL, 0);
+	filename = dso__build_id_filename(dso, NULL, 0);
 	if (filename != NULL) {
-		err = dso__load_vmlinux(self, map, filename, filter);
+		err = dso__load_vmlinux(dso, map, filename, filter);
 		if (err > 0) {
-			dso__set_long_name(self, filename);
+			dso__set_long_name(dso, filename);
 			goto out;
 		}
 		free(filename);
 	}
 
 	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
-		err = dso__load_vmlinux(self, map, vmlinux_path[i], filter);
+		err = dso__load_vmlinux(dso, map, vmlinux_path[i], filter);
 		if (err > 0) {
-			dso__set_long_name(self, strdup(vmlinux_path[i]));
+			dso__set_long_name(dso, strdup(vmlinux_path[i]));
 			break;
 		}
 	}
@@ -1885,7 +1892,7 @@ out:
 	return err;
 }
 
-static int dso__load_kernel_sym(struct dso *self, struct map *map,
+static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 				symbol_filter_t filter)
 {
 	int err;
@@ -1912,10 +1919,10 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	}
 
 	if (symbol_conf.vmlinux_name != NULL) {
-		err = dso__load_vmlinux(self, map,
+		err = dso__load_vmlinux(dso, map,
 					symbol_conf.vmlinux_name, filter);
 		if (err > 0) {
-			dso__set_long_name(self,
+			dso__set_long_name(dso,
 					   strdup(symbol_conf.vmlinux_name));
 			goto out_fixup;
 		}
@@ -1923,7 +1930,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	}
 
 	if (vmlinux_path != NULL) {
-		err = dso__load_vmlinux_path(self, map, filter);
+		err = dso__load_vmlinux_path(dso, map, filter);
 		if (err > 0)
 			goto out_fixup;
 	}
@@ -1937,13 +1944,13 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	 * we have a build-id, so check if it is the same as the running kernel,
 	 * using it if it is.
 	 */
-	if (self->has_build_id) {
+	if (dso->has_build_id) {
 		u8 kallsyms_build_id[BUILD_ID_SIZE];
 		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
 		if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
 					 sizeof(kallsyms_build_id)) == 0) {
-			if (dso__build_id_equal(self, kallsyms_build_id)) {
+			if (dso__build_id_equal(dso, kallsyms_build_id)) {
 				kallsyms_filename = "/proc/kallsyms";
 				goto do_kallsyms;
 			}
@@ -1952,7 +1959,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 		 * Now look if we have it on the build-id cache in
 		 * $HOME/.debug/[kernel.kallsyms].
 		 */
-		build_id__sprintf(self->build_id, sizeof(self->build_id),
+		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
 				  sbuild_id);
 
 		if (asprintf(&kallsyms_allocated_filename,
@@ -1979,7 +1986,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	}
 
 do_kallsyms:
-	err = dso__load_kallsyms(self, kallsyms_filename, map, filter);
+	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
 	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	free(kallsyms_allocated_filename);
@@ -1987,7 +1994,7 @@ do_kallsyms:
 	if (err > 0) {
 out_fixup:
 		if (kallsyms_filename != NULL)
-			dso__set_long_name(self, strdup("[kernel.kallsyms]"));
+			dso__set_long_name(dso, strdup("[kernel.kallsyms]"));
 		map__fixup_start(map);
 		map__fixup_end(map);
 	}
@@ -1995,8 +2002,8 @@ out_fixup:
 	return err;
 }
 
-static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
-				symbol_filter_t filter)
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
+				      symbol_filter_t filter)
 {
 	int err;
 	const char *kallsyms_filename = NULL;
@@ -2016,7 +2023,7 @@ static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
 		 * Or use file guest_kallsyms inputted by user on commandline
 		 */
 		if (symbol_conf.default_guest_vmlinux_name != NULL) {
-			err = dso__load_vmlinux(self, map,
+			err = dso__load_vmlinux(dso, map,
 				symbol_conf.default_guest_vmlinux_name, filter);
 			goto out_try_fixup;
 		}
@@ -2029,7 +2036,7 @@ static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
 		kallsyms_filename = path;
 	}
 
-	err = dso__load_kallsyms(self, kallsyms_filename, map, filter);
+	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
 	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 
@@ -2037,7 +2044,7 @@ out_try_fixup:
 	if (err > 0) {
 		if (kallsyms_filename != NULL) {
 			machine__mmap_name(machine, path, sizeof(path));
-			dso__set_long_name(self, strdup(path));
+			dso__set_long_name(dso, strdup(path));
 		}
 		map__fixup_start(map);
 		map__fixup_end(map);
@@ -2090,12 +2097,12 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp)
 	return ret;
 }
 
-size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp)
+size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp)
 {
 	struct rb_node *nd;
 	size_t ret = 0;
 
-	for (nd = rb_first(self); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
 		ret += __dsos__fprintf(&pos->kernel_dsos, fp);
 		ret += __dsos__fprintf(&pos->user_dsos, fp);
@@ -2119,18 +2126,20 @@ static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
 	return ret;
 }
 
-size_t machine__fprintf_dsos_buildid(struct machine *self, FILE *fp, bool with_hits)
+size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+				     bool with_hits)
 {
-	return __dsos__fprintf_buildid(&self->kernel_dsos, fp, with_hits) +
-	       __dsos__fprintf_buildid(&self->user_dsos, fp, with_hits);
+	return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, with_hits) +
+	       __dsos__fprintf_buildid(&machine->user_dsos, fp, with_hits);
 }
 
-size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits)
+size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
+				      FILE *fp, bool with_hits)
 {
 	struct rb_node *nd;
 	size_t ret = 0;
 
-	for (nd = rb_first(self); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
 		ret += machine__fprintf_dsos_buildid(pos, fp, with_hits);
 	}
@@ -2139,59 +2148,59 @@ size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_
 
 struct dso *dso__new_kernel(const char *name)
 {
-	struct dso *self = dso__new(name ?: "[kernel.kallsyms]");
+	struct dso *dso = dso__new(name ?: "[kernel.kallsyms]");
 
-	if (self != NULL) {
-		dso__set_short_name(self, "[kernel]");
-		self->kernel = DSO_TYPE_KERNEL;
+	if (dso != NULL) {
+		dso__set_short_name(dso, "[kernel]");
+		dso->kernel = DSO_TYPE_KERNEL;
 	}
 
-	return self;
+	return dso;
 }
 
 static struct dso *dso__new_guest_kernel(struct machine *machine,
 					const char *name)
 {
 	char bf[PATH_MAX];
-	struct dso *self = dso__new(name ?: machine__mmap_name(machine, bf, sizeof(bf)));
-
-	if (self != NULL) {
-		dso__set_short_name(self, "[guest.kernel]");
-		self->kernel = DSO_TYPE_GUEST_KERNEL;
+	struct dso *dso = dso__new(name ?: machine__mmap_name(machine, bf,
+							      sizeof(bf)));
+	if (dso != NULL) {
+		dso__set_short_name(dso, "[guest.kernel]");
+		dso->kernel = DSO_TYPE_GUEST_KERNEL;
 	}
 
-	return self;
+	return dso;
 }
 
-void dso__read_running_kernel_build_id(struct dso *self, struct machine *machine)
+void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
 {
 	char path[PATH_MAX];
 
 	if (machine__is_default_guest(machine))
 		return;
 	sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
-	if (sysfs__read_build_id(path, self->build_id,
-				 sizeof(self->build_id)) == 0)
-		self->has_build_id = true;
+	if (sysfs__read_build_id(path, dso->build_id,
+				 sizeof(dso->build_id)) == 0)
+		dso->has_build_id = true;
 }
 
-static struct dso *machine__create_kernel(struct machine *self)
+static struct dso *machine__create_kernel(struct machine *machine)
 {
 	const char *vmlinux_name = NULL;
 	struct dso *kernel;
 
-	if (machine__is_host(self)) {
+	if (machine__is_host(machine)) {
 		vmlinux_name = symbol_conf.vmlinux_name;
 		kernel = dso__new_kernel(vmlinux_name);
 	} else {
-		if (machine__is_default_guest(self))
+		if (machine__is_default_guest(machine))
 			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
-		kernel = dso__new_guest_kernel(self, vmlinux_name);
+		kernel = dso__new_guest_kernel(machine, vmlinux_name);
 	}
 
 	if (kernel != NULL) {
-		dso__read_running_kernel_build_id(kernel, self);
-		dsos__add(&self->kernel_dsos, kernel);
+		dso__read_running_kernel_build_id(kernel, machine);
+		dsos__add(&machine->kernel_dsos, kernel);
 	}
 	return kernel;
 }
@@ -2236,41 +2245,43 @@ static u64 machine__get_kernel_start_addr(struct machine *machine)
 	return args.start;
 }
 
-int __machine__create_kernel_maps(struct machine *self, struct dso *kernel)
+int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 {
 	enum map_type type;
-	u64 start = machine__get_kernel_start_addr(self);
+	u64 start = machine__get_kernel_start_addr(machine);
 
 	for (type = 0; type < MAP__NR_TYPES; ++type) {
 		struct kmap *kmap;
 
-		self->vmlinux_maps[type] = map__new2(start, kernel, type);
-		if (self->vmlinux_maps[type] == NULL)
+		machine->vmlinux_maps[type] = map__new2(start, kernel, type);
+		if (machine->vmlinux_maps[type] == NULL)
 			return -1;
 
-		self->vmlinux_maps[type]->map_ip =
-			self->vmlinux_maps[type]->unmap_ip = identity__map_ip;
-
-		kmap = map__kmap(self->vmlinux_maps[type]);
-		kmap->kmaps = &self->kmaps;
-		map_groups__insert(&self->kmaps, self->vmlinux_maps[type]);
+		machine->vmlinux_maps[type]->map_ip =
+			machine->vmlinux_maps[type]->unmap_ip =
+				identity__map_ip;
+		kmap = map__kmap(machine->vmlinux_maps[type]);
+		kmap->kmaps = &machine->kmaps;
+		map_groups__insert(&machine->kmaps,
+				   machine->vmlinux_maps[type]);
 	}
 
 	return 0;
 }
 
-void machine__destroy_kernel_maps(struct machine *self)
+void machine__destroy_kernel_maps(struct machine *machine)
 {
 	enum map_type type;
 
 	for (type = 0; type < MAP__NR_TYPES; ++type) {
 		struct kmap *kmap;
 
-		if (self->vmlinux_maps[type] == NULL)
+		if (machine->vmlinux_maps[type] == NULL)
 			continue;
 
-		kmap = map__kmap(self->vmlinux_maps[type]);
-		map_groups__remove(&self->kmaps, self->vmlinux_maps[type]);
+		kmap = map__kmap(machine->vmlinux_maps[type]);
+		map_groups__remove(&machine->kmaps,
+				   machine->vmlinux_maps[type]);
 		if (kmap->ref_reloc_sym) {
 			/*
 			 * ref_reloc_sym is shared among all maps, so free just
@@ -2284,25 +2295,25 @@ void machine__destroy_kernel_maps(struct machine *self)
 			kmap->ref_reloc_sym = NULL;
 		}
 
-		map__delete(self->vmlinux_maps[type]);
-		self->vmlinux_maps[type] = NULL;
+		map__delete(machine->vmlinux_maps[type]);
+		machine->vmlinux_maps[type] = NULL;
 	}
 }
 
-int machine__create_kernel_maps(struct machine *self)
+int machine__create_kernel_maps(struct machine *machine)
 {
-	struct dso *kernel = machine__create_kernel(self);
+	struct dso *kernel = machine__create_kernel(machine);
 
 	if (kernel == NULL ||
-	    __machine__create_kernel_maps(self, kernel) < 0)
+	    __machine__create_kernel_maps(machine, kernel) < 0)
 		return -1;
 
-	if (symbol_conf.use_modules && machine__create_modules(self) < 0)
+	if (symbol_conf.use_modules && machine__create_modules(machine) < 0)
 		pr_debug("Problems creating module maps, continuing anyway...\n");
 	/*
 	 * Now that we have all the maps created, just set the ->end of them:
 	 */
-	map_groups__fixup_end(&self->kmaps);
+	map_groups__fixup_end(&machine->kmaps);
 	return 0;
 }
 
@@ -2366,11 +2377,11 @@ out_fail:
 	return -1;
 }
 
-size_t machine__fprintf_vmlinux_path(struct machine *self, FILE *fp)
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
 {
 	int i;
 	size_t printed = 0;
-	struct dso *kdso = self->vmlinux_maps[MAP__FUNCTION]->dso;
+	struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso;
 
 	if (kdso->has_build_id) {
 		char filename[PATH_MAX];
@@ -2467,9 +2478,9 @@ void symbol__exit(void)
 	symbol_conf.initialized = false;
 }
 
-int machines__create_kernel_maps(struct rb_root *self, pid_t pid)
+int machines__create_kernel_maps(struct rb_root *machines, pid_t pid)
 {
-	struct machine *machine = machines__findnew(self, pid);
+	struct machine *machine = machines__findnew(machines, pid);
 
 	if (machine == NULL)
 		return -1;
@@ -2520,7 +2531,7 @@ char *strxfrchar(char *s, char from, char to)
 	return s;
 }
 
-int machines__create_guest_kernel_maps(struct rb_root *self)
+int machines__create_guest_kernel_maps(struct rb_root *machines)
 {
 	int ret = 0;
 	struct dirent **namelist = NULL;
@@ -2531,7 +2542,7 @@ int machines__create_guest_kernel_maps(struct rb_root *self)
 	if (symbol_conf.default_guest_vmlinux_name ||
 	    symbol_conf.default_guest_modules ||
 	    symbol_conf.default_guest_kallsyms) {
-		machines__create_kernel_maps(self, DEFAULT_GUEST_KERNEL_ID);
+		machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
 	}
 
 	if (symbol_conf.guestmount) {
@@ -2552,7 +2563,7 @@ int machines__create_guest_kernel_maps(struct rb_root *self)
 				pr_debug("Can't access file %s\n", path);
 				goto failure;
 			}
-			machines__create_kernel_maps(self, pid);
+			machines__create_kernel_maps(machines, pid);
 		}
 failure:
 		free(namelist);
@@ -2561,23 +2572,23 @@ failure:
 	return ret;
 }
 
-void machines__destroy_guest_kernel_maps(struct rb_root *self)
+void machines__destroy_guest_kernel_maps(struct rb_root *machines)
 {
-	struct rb_node *next = rb_first(self);
+	struct rb_node *next = rb_first(machines);
 
 	while (next) {
 		struct machine *pos = rb_entry(next, struct machine, rb_node);
 
 		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, self);
+		rb_erase(&pos->rb_node, machines);
 		machine__delete(pos);
 	}
 }
 
-int machine__load_kallsyms(struct machine *self, const char *filename,
+int machine__load_kallsyms(struct machine *machine, const char *filename,
 			   enum map_type type, symbol_filter_t filter)
 {
-	struct map *map = self->vmlinux_maps[type];
+	struct map *map = machine->vmlinux_maps[type];
 	int ret = dso__load_kallsyms(map->dso, filename, map, filter);
 
 	if (ret > 0) {
@@ -2587,16 +2598,16 @@ int machine__load_kallsyms(struct machine *self, const char *filename,
 		 * kernel, with modules between them, fixup the end of all
 		 * sections.
 		 */
-		__map_groups__fixup_end(&self->kmaps, type);
+		__map_groups__fixup_end(&machine->kmaps, type);
 	}
 
 	return ret;
 }
 
-int machine__load_vmlinux_path(struct machine *self, enum map_type type,
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
 			       symbol_filter_t filter)
 {
-	struct map *map = self->vmlinux_maps[type];
+	struct map *map = machine->vmlinux_maps[type];
 	int ret = dso__load_vmlinux_path(map->dso, map, filter);
 
 	if (ret > 0) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 713b0b40cc4a..242de0101a86 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -62,7 +62,7 @@ struct symbol {
 	char		name[0];
 };
 
-void symbol__delete(struct symbol *self);
+void symbol__delete(struct symbol *sym);
 
 struct strlist;
 
@@ -96,9 +96,9 @@ struct symbol_conf {
 
 extern struct symbol_conf symbol_conf;
 
-static inline void *symbol__priv(struct symbol *self)
+static inline void *symbol__priv(struct symbol *sym)
 {
-	return ((void *)self) - symbol_conf.priv_size;
+	return ((void *)sym) - symbol_conf.priv_size;
 }
 
 struct ref_reloc_sym {
@@ -155,43 +155,45 @@ struct dso {
 
 struct dso *dso__new(const char *name);
 struct dso *dso__new_kernel(const char *name);
-void dso__delete(struct dso *self);
+void dso__delete(struct dso *dso);
 
-int dso__name_len(const struct dso *self);
+int dso__name_len(const struct dso *dso);
 
-bool dso__loaded(const struct dso *self, enum map_type type);
-bool dso__sorted_by_name(const struct dso *self, enum map_type type);
+bool dso__loaded(const struct dso *dso, enum map_type type);
+bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
 
-static inline void dso__set_loaded(struct dso *self, enum map_type type)
+static inline void dso__set_loaded(struct dso *dso, enum map_type type)
 {
-	self->loaded |= (1 << type);
+	dso->loaded |= (1 << type);
 }
 
-void dso__sort_by_name(struct dso *self, enum map_type type);
+void dso__sort_by_name(struct dso *dso, enum map_type type);
 
 struct dso *__dsos__findnew(struct list_head *head, const char *name);
 
-int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
-int dso__load_vmlinux(struct dso *self, struct map *map,
+int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter);
+int dso__load_vmlinux(struct dso *dso, struct map *map,
 		      const char *vmlinux, symbol_filter_t filter);
-int dso__load_vmlinux_path(struct dso *self, struct map *map,
+int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 			   symbol_filter_t filter);
-int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
 		       symbol_filter_t filter);
-int machine__load_kallsyms(struct machine *self, const char *filename,
+int machine__load_kallsyms(struct machine *machine, const char *filename,
 			   enum map_type type, symbol_filter_t filter);
-int machine__load_vmlinux_path(struct machine *self, enum map_type type,
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
 			       symbol_filter_t filter);
 
 size_t __dsos__fprintf(struct list_head *head, FILE *fp);
 
-size_t machine__fprintf_dsos_buildid(struct machine *self, FILE *fp, bool with_hits);
-size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp);
-size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits);
-
-size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
-size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp);
-size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
+size_t machine__fprintf_dsos_buildid(struct machine *machine,
+				     FILE *fp, bool with_hits);
+size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp);
+size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
+				      FILE *fp, bool with_hits);
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+				    enum map_type type, FILE *fp);
+size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
 
 enum symtab_type {
 	SYMTAB__KALLSYMS = 0,
@@ -207,34 +209,36 @@ enum symtab_type {
 	SYMTAB__NOT_FOUND,
 };
 
-char dso__symtab_origin(const struct dso *self);
-void dso__set_long_name(struct dso *self, char *name);
-void dso__set_build_id(struct dso *self, void *build_id);
-void dso__read_running_kernel_build_id(struct dso *self, struct machine *machine);
-struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
-struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
+char dso__symtab_origin(const struct dso *dso);
+void dso__set_long_name(struct dso *dso, char *name);
+void dso__set_build_id(struct dso *dso, void *build_id);
+void dso__read_running_kernel_build_id(struct dso *dso,
+				       struct machine *machine);
+struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
+				u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name);
 
 int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
-int build_id__sprintf(const u8 *self, int len, char *bf);
+int build_id__sprintf(const u8 *build_id, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start, u64 end));
 
-void machine__destroy_kernel_maps(struct machine *self);
-int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
-int machine__create_kernel_maps(struct machine *self);
+void machine__destroy_kernel_maps(struct machine *machine);
+int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
+int machine__create_kernel_maps(struct machine *machine);
 
-int machines__create_kernel_maps(struct rb_root *self, pid_t pid);
-int machines__create_guest_kernel_maps(struct rb_root *self);
-void machines__destroy_guest_kernel_maps(struct rb_root *self);
+int machines__create_kernel_maps(struct rb_root *machines, pid_t pid);
+int machines__create_guest_kernel_maps(struct rb_root *machines);
+void machines__destroy_guest_kernel_maps(struct rb_root *machines);
 
 int symbol__init(void);
 void symbol__exit(void);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 
-size_t machine__fprintf_vmlinux_path(struct machine *self, FILE *fp);
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
 
 #endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0a7ed5b5e281..1e88485c16a0 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2187,7 +2187,6 @@ static const struct flag flags[] = {
 	{ "TASKLET_SOFTIRQ", 6 },
 	{ "SCHED_SOFTIRQ", 7 },
 	{ "HRTIMER_SOFTIRQ", 8 },
-	{ "RCU_SOFTIRQ", 9 },
 
 	{ "HRTIMER_NORESTART", 0 },
 	{ "HRTIMER_RESTART", 1 },
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 8c17a8730e4a..15633d608133 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -256,10 +256,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
 			 int refresh)
 {
 	struct objdump_line *pos, *n;
-	struct annotation *notes = symbol__annotation(sym);
+	struct annotation *notes;
 	struct annotate_browser browser = {
 		.b = {
-			.entries = &notes->src->source,
 			.refresh = ui_browser__list_head_refresh,
 			.seek	 = ui_browser__list_head_seek,
 			.write	 = annotate_browser__write,
@@ -281,6 +280,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
 
 	ui_helpline__push("Press <- or ESC to exit");
 
+	notes = symbol__annotation(sym);
+
 	list_for_each_entry(pos, &notes->src->source, node) {
 		struct objdump_line_rb_node *rbpos;
 		size_t line_len = strlen(pos->line);
@@ -291,6 +292,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
 		rbpos->idx = browser.b.nr_entries++;
 	}
 
+	browser.b.entries = &notes->src->source,
 	browser.b.width += 18; /* Percentage */
 	ret = annotate_browser__run(&browser, evidx, refresh);
 	list_for_each_entry_safe(pos, n, &notes->src->source, node) {
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 798efdca3ead..5d767c622dfc 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -851,7 +851,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel,
 			goto out_free_stack;
 		case 'a':
 			if (browser->selection == NULL ||
-			    browser->selection->map == NULL ||
+			    browser->selection->sym == NULL ||
 			    browser->selection->map->dso->annotate_warned)
 				continue;
 			goto do_annotate;