diff options
author | Stefan Agner <stefan.agner@toradex.com> | 2015-10-23 15:25:54 -0700 |
---|---|---|
committer | Stefan Agner <stefan.agner@toradex.com> | 2015-10-23 15:25:54 -0700 |
commit | 0df341edfa5e7c119523a0c30146e88106f88b43 (patch) | |
tree | b8a8bfabaa488d70ce49a7941e974bab023cb2d0 /arch | |
parent | 9ac0b253c59216c9614436b2006d0557396eb268 (diff) | |
parent | 205a8514e63a221c3a5071f27521013444e43e5f (diff) |
Merge tag 'v4.1.11' into toradex_vf_4.1-next
This is the 4.1.11 stable release
Diffstat (limited to 'arch')
179 files changed, 1422 insertions, 963 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b12..19f4cc634b0e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -538,6 +538,7 @@ config ARCH_ORION5X select MVEBU_MBUS select PCI select PLAT_ORION_LEGACY + select MULTI_IRQ_HANDLER help Support for the following Marvell Orion 5x series SoCs: Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182), diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 985227cbbd1b..47f10e7ad1f6 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -50,6 +50,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index bd245d34952d..a0765e7ed6c7 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -57,5 +57,5 @@ extern char * strstr(const char * s1, const char *s2); int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { - return decompress(input, len, NULL, NULL, output, NULL, error); + return __decompress(input, len, NULL, NULL, output, 0, NULL, error); } diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index f03a091cd076..dfcc0dd637e5 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -116,7 +116,7 @@ ranges = <0 0x2000 0x2000>; scm_conf: scm_conf@0 { - compatible = "syscon"; + compatible = "syscon", "simple-bus"; reg = <0x0 0x1400>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts index 0b9906880c0c..75aba40c69e1 100644 --- a/arch/arm/boot/dts/exynos3250-rinato.dts +++ b/arch/arm/boot/dts/exynos3250-rinato.dts @@ -181,7 +181,7 @@ display-timings { timing-0 { - clock-frequency = <0>; + clock-frequency = <4600000>; hactive = <320>; vactive = <320>; hfront-porch = <1>; diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts index dd45e6971bc3..9351296356dc 100644 --- a/arch/arm/boot/dts/imx25-pdk.dts +++ b/arch/arm/boot/dts/imx25-pdk.dts @@ -10,6 +10,7 @@ */ /dts-v1/; +#include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> #include "imx25.dtsi" @@ -114,8 +115,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 1 0>; - wp-gpios = <&gpio2 0 0>; + cd-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index b6478e97d6a7..e6540b5cfa4c 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -286,8 +286,8 @@ can1: can@53fe4000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe4000 0x1000>; - clocks = <&clks 33>; - clock-names = "ipg"; + clocks = <&clks 33>, <&clks 33>; + clock-names = "ipg", "per"; interrupts = <43>; status = "disabled"; }; @@ -295,8 +295,8 @@ can2: can@53fe8000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe8000 0x1000>; - clocks = <&clks 34>; - clock-names = "ipg"; + clocks = <&clks 34>, <&clks 34>; + clock-names = "ipg", "per"; interrupts = <44>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx51-apf51dev.dts b/arch/arm/boot/dts/imx51-apf51dev.dts index 93d3ea12328c..0f3fe29b816e 100644 --- a/arch/arm/boot/dts/imx51-apf51dev.dts +++ b/arch/arm/boot/dts/imx51-apf51dev.dts @@ -98,7 +98,7 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 29 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 29 GPIO_ACTIVE_LOW>; bus-width = <4>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-ard.dts b/arch/arm/boot/dts/imx53-ard.dts index e9337ad52f59..3bc18835fb4b 100644 --- a/arch/arm/boot/dts/imx53-ard.dts +++ b/arch/arm/boot/dts/imx53-ard.dts @@ -103,8 +103,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts index d0e0f57eb432..53f40885c530 100644 --- a/arch/arm/boot/dts/imx53-m53evk.dts +++ b/arch/arm/boot/dts/imx53-m53evk.dts @@ -124,8 +124,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 181ae5ebf23f..1f55187ed9ce 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -147,8 +147,8 @@ &esdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc3>; - cd-gpios = <&gpio3 11 0>; - wp-gpios = <&gpio3 12 0>; + cd-gpios = <&gpio3 11 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio3 12 GPIO_ACTIVE_HIGH>; bus-width = <8>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-smd.dts b/arch/arm/boot/dts/imx53-smd.dts index 1d325576bcc0..fc89ce1e5763 100644 --- a/arch/arm/boot/dts/imx53-smd.dts +++ b/arch/arm/boot/dts/imx53-smd.dts @@ -41,8 +41,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio3 13 0>; - wp-gpios = <&gpio4 11 0>; + cd-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 11 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-tqma53.dtsi b/arch/arm/boot/dts/imx53-tqma53.dtsi index 4f1f0e2868bf..e03373a58760 100644 --- a/arch/arm/boot/dts/imx53-tqma53.dtsi +++ b/arch/arm/boot/dts/imx53-tqma53.dtsi @@ -41,8 +41,8 @@ pinctrl-0 = <&pinctrl_esdhc2>, <&pinctrl_esdhc2_cdwp>; vmmc-supply = <®_3p3v>; - wp-gpios = <&gpio1 2 0>; - cd-gpios = <&gpio1 4 0>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx53-tx53.dtsi b/arch/arm/boot/dts/imx53-tx53.dtsi index 704bd72cbfec..d3e50b22064f 100644 --- a/arch/arm/boot/dts/imx53-tx53.dtsi +++ b/arch/arm/boot/dts/imx53-tx53.dtsi @@ -183,7 +183,7 @@ }; &esdhc1 { - cd-gpios = <&gpio3 24 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; @@ -191,7 +191,7 @@ }; &esdhc2 { - cd-gpios = <&gpio3 25 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; diff --git a/arch/arm/boot/dts/imx53-voipac-bsb.dts b/arch/arm/boot/dts/imx53-voipac-bsb.dts index c17d3ad6dba5..fc51b87ad208 100644 --- a/arch/arm/boot/dts/imx53-voipac-bsb.dts +++ b/arch/arm/boot/dts/imx53-voipac-bsb.dts @@ -119,8 +119,8 @@ &esdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; - cd-gpios = <&gpio3 25 0>; - wp-gpios = <&gpio2 19 0>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 19 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi index 488a640796ac..394a4ace351a 100644 --- a/arch/arm/boot/dts/imx6qdl-rex.dtsi +++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi @@ -35,7 +35,6 @@ compatible = "regulator-fixed"; reg = <1>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbh1>; regulator-name = "usbh1_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; @@ -47,7 +46,6 @@ compatible = "regulator-fixed"; reg = <2>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbotg>; regulator-name = "usb_otg_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index f74a8ded515f..38c786018a09 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -153,10 +153,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gpc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gpc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gpc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gpc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_PCIE_AXI>, <&clks IMX6QDL_CLK_LVDS1_GATE>, <&clks IMX6QDL_CLK_PCIE_REF_125M>; diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi index 4773d6af66a0..d56d68fe7ffc 100644 --- a/arch/arm/boot/dts/k2e-clocks.dtsi +++ b/arch/arm/boot/dts/k2e-clocks.dtsi @@ -13,9 +13,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2hk-clocks.dtsi b/arch/arm/boot/dts/k2hk-clocks.dtsi index d5adee3c0067..af9b7190533a 100644 --- a/arch/arm/boot/dts/k2hk-clocks.dtsi +++ b/arch/arm/boot/dts/k2hk-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2l-clocks.dtsi b/arch/arm/boot/dts/k2l-clocks.dtsi index eb1e3e29f073..ef8464bb11ff 100644 --- a/arch/arm/boot/dts/k2l-clocks.dtsi +++ b/arch/arm/boot/dts/k2l-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi index 11a7963be003..2390f387c271 100644 --- a/arch/arm/boot/dts/omap2430.dtsi +++ b/arch/arm/boot/dts/omap2430.dtsi @@ -51,7 +51,8 @@ }; scm_conf: scm_conf@270 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x270 0x240>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index a5474113cd50..67659a0ed13e 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -202,7 +202,7 @@ tfp410_pins: pinmux_tfp410_pins { pinctrl-single,pins = < - 0x194 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ + 0x196 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ >; }; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index f884d6adb71e..84be9da74c7e 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -191,7 +191,8 @@ }; omap4_padconf_global: omap4_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0x170>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 74777a6e200a..1b958e92d674 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -174,8 +174,8 @@ i2c5_pins: pinmux_i2c5_pins { pinctrl-single,pins = < - 0x184 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ - 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ + 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ + 0x188 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ >; }; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 7d24ae0306b5..874a26f9dc0f 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -180,7 +180,8 @@ }; omap5_padconf_global: omap5_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0xec>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 165968d51d8f..8eca5878a877 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -584,7 +584,7 @@ compatible = "rockchip,rk3288-wdt", "snps,dw-wdt"; reg = <0xff800000 0x100>; clocks = <&cru PCLK_WDT>; - interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index a6ad93c9bce3..fd9eefce0a7b 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -259,15 +259,17 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return err; - patch_text((void *)bpt->bpt_addr, - *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, + *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); return 0; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 423663e23791..586eef26203d 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -343,12 +343,17 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ thumb = handler & 1; -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 6 /* - * Clear the If-Then Thumb-2 execution state - * ARM spec requires this to be all 000s in ARM mode - * Snapdragon S4/Krait misbehaves on a Thumb=>ARM - * signal transition without this. + * Clear the If-Then Thumb-2 execution state. ARM spec + * requires this to be all 000s in ARM mode. Snapdragon + * S4/Krait misbehaves on a Thumb=>ARM signal transition + * without this. + * + * We must do this whenever we are running on a Thumb-2 + * capable CPU, which includes ARMv6T2. However, we elect + * to do this whenever we're on an ARMv6 or later CPU for + * simplicity. */ cpsr &= ~PSR_IT_MASK; #endif diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d9631ecddd56..d6223cbcb661 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -450,7 +450,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the first * time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { + if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { ret = kvm_vgic_map_resources(kvm); if (ret) return ret; diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 48efe2ee452c..58048b333d31 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -518,8 +518,7 @@ ARM_BE8(rev r6, r6 ) mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] - bic r2, #1 @ Clear ENABLE - mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + isb mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL @@ -532,6 +531,9 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: + mov r2, #0 @ Clear ENABLE + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1d5accbd3dcf..191dcfab9f60 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1790,8 +1790,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (vma->vm_flags & VM_PFNMAP) { gpa_t gpa = mem->guest_phys_addr + (vm_start - mem->userspace_addr); - phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + - vm_start - vma->vm_start; + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile index 4c38674c73ec..54d274da7ccb 100644 --- a/arch/arm/mach-bcm/Makefile +++ b/arch/arm/mach-bcm/Makefile @@ -43,5 +43,5 @@ obj-$(CONFIG_ARCH_BCM_63XX) := bcm63xx.o ifeq ($(CONFIG_ARCH_BRCMSTB),y) CFLAGS_platsmp-brcmstb.o += -march=armv7-a obj-y += brcmstb.o -obj-$(CONFIG_SMP) += headsmp-brcmstb.o platsmp-brcmstb.o +obj-$(CONFIG_SMP) += platsmp-brcmstb.o endif diff --git a/arch/arm/mach-bcm/brcmstb.h b/arch/arm/mach-bcm/brcmstb.h deleted file mode 100644 index ec0c3d112b36..000000000000 --- a/arch/arm/mach-bcm/brcmstb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __BRCMSTB_H__ -#define __BRCMSTB_H__ - -void brcmstb_secondary_startup(void); - -#endif /* __BRCMSTB_H__ */ diff --git a/arch/arm/mach-bcm/headsmp-brcmstb.S b/arch/arm/mach-bcm/headsmp-brcmstb.S deleted file mode 100644 index 199c1ea58248..000000000000 --- a/arch/arm/mach-bcm/headsmp-brcmstb.S +++ /dev/null @@ -1,33 +0,0 @@ -/* - * SMP boot code for secondary CPUs - * Based on arch/arm/mach-tegra/headsmp.S - * - * Copyright (C) 2010 NVIDIA, Inc. - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <asm/assembler.h> -#include <linux/linkage.h> -#include <linux/init.h> - - .section ".text.head", "ax" - -ENTRY(brcmstb_secondary_startup) - /* - * Ensure CPU is in a sane state by disabling all IRQs and switching - * into SVC mode. - */ - setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r0 - - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(brcmstb_secondary_startup) diff --git a/arch/arm/mach-bcm/platsmp-brcmstb.c b/arch/arm/mach-bcm/platsmp-brcmstb.c index e209e6fc7caf..44d6bddf7a4e 100644 --- a/arch/arm/mach-bcm/platsmp-brcmstb.c +++ b/arch/arm/mach-bcm/platsmp-brcmstb.c @@ -30,8 +30,6 @@ #include <asm/mach-types.h> #include <asm/smp_plat.h> -#include "brcmstb.h" - enum { ZONE_MAN_CLKEN_MASK = BIT(0), ZONE_MAN_RESET_CNTL_MASK = BIT(1), @@ -153,7 +151,7 @@ static void brcmstb_cpu_boot(u32 cpu) * Set the reset vector to point to the secondary_startup * routine */ - cpu_set_boot_addr(cpu, virt_to_phys(brcmstb_secondary_startup)); + cpu_set_boot_addr(cpu, virt_to_phys(secondary_startup)); /* Unhalt the cpu */ cpu_rst_cfg_set(cpu, 0); diff --git a/arch/arm/mach-berlin/headsmp.S b/arch/arm/mach-berlin/headsmp.S index 4a4c56a58ad3..dc82a3486b05 100644 --- a/arch/arm/mach-berlin/headsmp.S +++ b/arch/arm/mach-berlin/headsmp.S @@ -12,12 +12,6 @@ #include <linux/init.h> #include <asm/assembler.h> -ENTRY(berlin_secondary_startup) - ARM_BE8(setend be) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(berlin_secondary_startup) - /* * If the following instruction is set in the reset exception vector, CPUs * will fetch the value of the software reset address vector when being diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c index 702e7982015a..34a3753e7356 100644 --- a/arch/arm/mach-berlin/platsmp.c +++ b/arch/arm/mach-berlin/platsmp.c @@ -22,7 +22,6 @@ #define RESET_VECT 0x00 #define SW_RESET_ADDR 0x94 -extern void berlin_secondary_startup(void); extern u32 boot_inst; static void __iomem *cpu_ctrl; @@ -85,7 +84,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus) * Write the secondary startup address into the SW reset address * vector. This is used by boot_inst. */ - writel(virt_to_phys(berlin_secondary_startup), vectors_base + SW_RESET_ADDR); + writel(virt_to_phys(secondary_startup), vectors_base + SW_RESET_ADDR); iounmap(vectors_base); unmap_scu: diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index 9bdf54795f05..56978199c479 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -20,6 +20,7 @@ #include <asm/cputype.h> #include <asm/cp15.h> #include <asm/mcpm.h> +#include <asm/smp_plat.h> #include "regs-pmu.h" #include "common.h" @@ -70,7 +71,31 @@ static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) cluster >= EXYNOS5420_NR_CLUSTERS) return -EINVAL; - exynos_cpu_power_up(cpunr); + if (!exynos_cpu_power_state(cpunr)) { + exynos_cpu_power_up(cpunr); + + /* + * This assumes the cluster number of the big cores(Cortex A15) + * is 0 and the Little cores(Cortex A7) is 1. + * When the system was booted from the Little core, + * they should be reset during power up cpu. + */ + if (cluster && + cluster == MPIDR_AFFINITY_LEVEL(cpu_logical_map(0), 1)) { + /* + * Before we reset the Little cores, we should wait + * the SPARE2 register is set to 1 because the init + * codes of the iROM will set the register after + * initialization. + */ + while (!pmu_raw_readl(S5P_PMU_SPARE2)) + udelay(10); + + pmu_raw_writel(EXYNOS5420_KFC_CORE_RESET(cpu), + EXYNOS_SWRESET); + } + } + return 0; } diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h index b7614333d296..fba9068ed260 100644 --- a/arch/arm/mach-exynos/regs-pmu.h +++ b/arch/arm/mach-exynos/regs-pmu.h @@ -513,6 +513,12 @@ static inline unsigned int exynos_pmu_cpunr(unsigned int mpidr) #define SPREAD_ENABLE 0xF #define SPREAD_USE_STANDWFI 0xF +#define EXYNOS5420_KFC_CORE_RESET0 BIT(8) +#define EXYNOS5420_KFC_ETM_RESET0 BIT(20) + +#define EXYNOS5420_KFC_CORE_RESET(_nr) \ + ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) + #define EXYNOS5420_BB_CON1 0x0784 #define EXYNOS5420_BB_SEL_EN BIT(31) #define EXYNOS5420_BB_PMOS_EN BIT(7) diff --git a/arch/arm/mach-hisi/Makefile b/arch/arm/mach-hisi/Makefile index 6b7b3033de0b..659db1933ed3 100644 --- a/arch/arm/mach-hisi/Makefile +++ b/arch/arm/mach-hisi/Makefile @@ -6,4 +6,4 @@ CFLAGS_platmcpm.o := -march=armv7-a obj-y += hisilicon.o obj-$(CONFIG_MCPM) += platmcpm.o -obj-$(CONFIG_SMP) += platsmp.o hotplug.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o hotplug.o diff --git a/arch/arm/mach-hisi/core.h b/arch/arm/mach-hisi/core.h index 92a682d8e939..c7648ef1825c 100644 --- a/arch/arm/mach-hisi/core.h +++ b/arch/arm/mach-hisi/core.h @@ -12,7 +12,6 @@ extern void hi3xxx_cpu_die(unsigned int cpu); extern int hi3xxx_cpu_kill(unsigned int cpu); extern void hi3xxx_set_cpu(int cpu, bool enable); -extern void hisi_secondary_startup(void); extern struct smp_operations hix5hd2_smp_ops; extern void hix5hd2_set_cpu(int cpu, bool enable); extern void hix5hd2_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-hisi/headsmp.S b/arch/arm/mach-hisi/headsmp.S deleted file mode 100644 index 81e35b159e75..000000000000 --- a/arch/arm/mach-hisi/headsmp.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2014 Hisilicon Limited. - * Copyright (c) 2014 Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <linux/init.h> - - __CPUINIT - -ENTRY(hisi_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index 8880c8e8b296..51744127db66 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -118,7 +118,7 @@ static int hix5hd2_boot_secondary(unsigned int cpu, struct task_struct *idle) { phys_addr_t jumpaddr; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hix5hd2_set_scu_boot_addr(HIX5HD2_BOOT_ADDRESS, jumpaddr); hix5hd2_set_cpu(cpu, true); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -156,7 +156,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) struct device_node *node; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hip01_set_boot_addr(HIP01_BOOT_ADDRESS, jumpaddr); node = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl"); diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S index de5047c8a6c8..b5e976816b63 100644 --- a/arch/arm/mach-imx/headsmp.S +++ b/arch/arm/mach-imx/headsmp.S @@ -25,7 +25,6 @@ diag_reg_offset: .endm ENTRY(v7_secondary_startup) - bl v7_invalidate_l1 set_diag_reg b secondary_startup ENDPROC(v7_secondary_startup) diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 08d5ed46b996..48e4c4b3cd1c 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -21,7 +21,6 @@ ENTRY(mvebu_cortex_a9_secondary_startup) ARM_BE8(setend be) - bl v7_invalidate_l1 bl armada_38x_scu_power_up b secondary_startup ENDPROC(mvebu_cortex_a9_secondary_startup) diff --git a/arch/arm/mach-omap2/clockdomains7xx_data.c b/arch/arm/mach-omap2/clockdomains7xx_data.c index 57d5df0c1fbd..7581e036bda6 100644 --- a/arch/arm/mach-omap2/clockdomains7xx_data.c +++ b/arch/arm/mach-omap2/clockdomains7xx_data.c @@ -331,7 +331,7 @@ static struct clockdomain l4per2_7xx_clkdm = { .dep_bit = DRA7XX_L4PER2_STATDEP_SHIFT, .wkdep_srcs = l4per2_wkup_sleep_deps, .sleepdep_srcs = l4per2_wkup_sleep_deps, - .flags = CLKDM_CAN_HWSUP_SWSUP, + .flags = CLKDM_CAN_SWSUP, }; static struct clockdomain mpu0_7xx_clkdm = { diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 3b56722dfd8a..6833df45d7b1 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -392,6 +392,7 @@ static struct irq_chip wakeupgen_chip = { .irq_mask = wakeupgen_mask, .irq_unmask = wakeupgen_unmask, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 752969ff9de0..5286e7773ed4 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2373,6 +2373,9 @@ static int of_dev_hwmod_lookup(struct device_node *np, * registers. This address is needed early so the OCP registers that * are part of the device's address space can be ioremapped properly. * + * If SYSC access is not needed, the registers will not be remapped + * and non-availability of MPU access is not treated as an error. + * * Returns 0 on success, -EINVAL if an invalid hwmod is passed, and * -ENXIO on absent or invalid register target address space. */ @@ -2387,6 +2390,11 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, _save_mpu_port_index(oh); + /* if we don't need sysc access we don't need to ioremap */ + if (!oh->class->sysc) + return 0; + + /* we can't continue without MPU PORT if we need sysc access */ if (oh->_int_flags & _HWMOD_NO_MPU_PORT) return -ENXIO; @@ -2396,8 +2404,10 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, oh->name); /* Extract the IO space from device tree blob */ - if (!np) + if (!np) { + pr_err("omap_hwmod: %s: no dt node\n", oh->name); return -ENXIO; + } va_start = of_iomap(np, index + oh->mpu_rt_idx); } else { @@ -2456,13 +2466,11 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->name, np->name); } - if (oh->class->sysc) { - r = _init_mpu_rt_base(oh, NULL, index, np); - if (r < 0) { - WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", - oh->name); - return 0; - } + r = _init_mpu_rt_base(oh, NULL, index, np); + if (r < 0) { + WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", + oh->name); + return 0; } r = _init_clocks(oh, NULL); diff --git a/arch/arm/mach-orion5x/include/mach/irqs.h b/arch/arm/mach-orion5x/include/mach/irqs.h index a6fa9d8f12d8..2431d9923427 100644 --- a/arch/arm/mach-orion5x/include/mach/irqs.h +++ b/arch/arm/mach-orion5x/include/mach/irqs.h @@ -16,42 +16,42 @@ /* * Orion Main Interrupt Controller */ -#define IRQ_ORION5X_BRIDGE 0 -#define IRQ_ORION5X_DOORBELL_H2C 1 -#define IRQ_ORION5X_DOORBELL_C2H 2 -#define IRQ_ORION5X_UART0 3 -#define IRQ_ORION5X_UART1 4 -#define IRQ_ORION5X_I2C 5 -#define IRQ_ORION5X_GPIO_0_7 6 -#define IRQ_ORION5X_GPIO_8_15 7 -#define IRQ_ORION5X_GPIO_16_23 8 -#define IRQ_ORION5X_GPIO_24_31 9 -#define IRQ_ORION5X_PCIE0_ERR 10 -#define IRQ_ORION5X_PCIE0_INT 11 -#define IRQ_ORION5X_USB1_CTRL 12 -#define IRQ_ORION5X_DEV_BUS_ERR 14 -#define IRQ_ORION5X_PCI_ERR 15 -#define IRQ_ORION5X_USB_BR_ERR 16 -#define IRQ_ORION5X_USB0_CTRL 17 -#define IRQ_ORION5X_ETH_RX 18 -#define IRQ_ORION5X_ETH_TX 19 -#define IRQ_ORION5X_ETH_MISC 20 -#define IRQ_ORION5X_ETH_SUM 21 -#define IRQ_ORION5X_ETH_ERR 22 -#define IRQ_ORION5X_IDMA_ERR 23 -#define IRQ_ORION5X_IDMA_0 24 -#define IRQ_ORION5X_IDMA_1 25 -#define IRQ_ORION5X_IDMA_2 26 -#define IRQ_ORION5X_IDMA_3 27 -#define IRQ_ORION5X_CESA 28 -#define IRQ_ORION5X_SATA 29 -#define IRQ_ORION5X_XOR0 30 -#define IRQ_ORION5X_XOR1 31 +#define IRQ_ORION5X_BRIDGE (1 + 0) +#define IRQ_ORION5X_DOORBELL_H2C (1 + 1) +#define IRQ_ORION5X_DOORBELL_C2H (1 + 2) +#define IRQ_ORION5X_UART0 (1 + 3) +#define IRQ_ORION5X_UART1 (1 + 4) +#define IRQ_ORION5X_I2C (1 + 5) +#define IRQ_ORION5X_GPIO_0_7 (1 + 6) +#define IRQ_ORION5X_GPIO_8_15 (1 + 7) +#define IRQ_ORION5X_GPIO_16_23 (1 + 8) +#define IRQ_ORION5X_GPIO_24_31 (1 + 9) +#define IRQ_ORION5X_PCIE0_ERR (1 + 10) +#define IRQ_ORION5X_PCIE0_INT (1 + 11) +#define IRQ_ORION5X_USB1_CTRL (1 + 12) +#define IRQ_ORION5X_DEV_BUS_ERR (1 + 14) +#define IRQ_ORION5X_PCI_ERR (1 + 15) +#define IRQ_ORION5X_USB_BR_ERR (1 + 16) +#define IRQ_ORION5X_USB0_CTRL (1 + 17) +#define IRQ_ORION5X_ETH_RX (1 + 18) +#define IRQ_ORION5X_ETH_TX (1 + 19) +#define IRQ_ORION5X_ETH_MISC (1 + 20) +#define IRQ_ORION5X_ETH_SUM (1 + 21) +#define IRQ_ORION5X_ETH_ERR (1 + 22) +#define IRQ_ORION5X_IDMA_ERR (1 + 23) +#define IRQ_ORION5X_IDMA_0 (1 + 24) +#define IRQ_ORION5X_IDMA_1 (1 + 25) +#define IRQ_ORION5X_IDMA_2 (1 + 26) +#define IRQ_ORION5X_IDMA_3 (1 + 27) +#define IRQ_ORION5X_CESA (1 + 28) +#define IRQ_ORION5X_SATA (1 + 29) +#define IRQ_ORION5X_XOR0 (1 + 30) +#define IRQ_ORION5X_XOR1 (1 + 31) /* * Orion General Purpose Pins */ -#define IRQ_ORION5X_GPIO_START 32 +#define IRQ_ORION5X_GPIO_START 33 #define NR_GPIO_IRQS 32 #define NR_IRQS (IRQ_ORION5X_GPIO_START + NR_GPIO_IRQS) diff --git a/arch/arm/mach-orion5x/irq.c b/arch/arm/mach-orion5x/irq.c index cd4bac4d7e43..086ecb87d885 100644 --- a/arch/arm/mach-orion5x/irq.c +++ b/arch/arm/mach-orion5x/irq.c @@ -42,7 +42,7 @@ __exception_irq_entry orion5x_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(MAIN_IRQ_CAUSE); stat &= readl_relaxed(MAIN_IRQ_MASK); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -51,7 +51,7 @@ __exception_irq_entry orion5x_legacy_handle_irq(struct pt_regs *regs) void __init orion5x_init_irq(void) { - orion_irq_init(0, MAIN_IRQ_MASK); + orion_irq_init(1, MAIN_IRQ_MASK); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(orion5x_legacy_handle_irq); diff --git a/arch/arm/mach-prima2/headsmp.S b/arch/arm/mach-prima2/headsmp.S index d86fe33c5f53..209d9fc5c16c 100644 --- a/arch/arm/mach-prima2/headsmp.S +++ b/arch/arm/mach-prima2/headsmp.S @@ -15,7 +15,6 @@ * ready for them to initialise. */ ENTRY(sirfsoc_secondary_startup) - bl v7_invalidate_l1 mrc p15, 0, r0, c0, c0, 5 and r0, r0, #15 adr r4, 1f diff --git a/arch/arm/mach-rockchip/core.h b/arch/arm/mach-rockchip/core.h index 39bca96b555a..492c048813da 100644 --- a/arch/arm/mach-rockchip/core.h +++ b/arch/arm/mach-rockchip/core.h @@ -17,4 +17,3 @@ extern char rockchip_secondary_trampoline; extern char rockchip_secondary_trampoline_end; extern unsigned long rockchip_boot_fn; -extern void rockchip_secondary_startup(void); diff --git a/arch/arm/mach-rockchip/headsmp.S b/arch/arm/mach-rockchip/headsmp.S index 46c22dedf632..d69708b07282 100644 --- a/arch/arm/mach-rockchip/headsmp.S +++ b/arch/arm/mach-rockchip/headsmp.S @@ -15,14 +15,6 @@ #include <linux/linkage.h> #include <linux/init.h> -ENTRY(rockchip_secondary_startup) - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - ldr r1, =0x00000c09 @ Cortex-A9 primary part number - teq r0, r1 - beq v7_invalidate_l1 - b secondary_startup -ENDPROC(rockchip_secondary_startup) - ENTRY(rockchip_secondary_trampoline) ldr pc, 1f ENDPROC(rockchip_secondary_trampoline) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 5b4ca3c3c879..611a5f96d3ca 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -72,29 +72,22 @@ static struct reset_control *rockchip_get_core_reset(int cpu) static int pmu_set_power_domain(int pd, bool on) { u32 val = (on) ? 0 : BIT(pd); + struct reset_control *rstc = rockchip_get_core_reset(pd); int ret; + if (IS_ERR(rstc) && read_cpuid_part() != ARM_CPU_PART_CORTEX_A9) { + pr_err("%s: could not get reset control for core %d\n", + __func__, pd); + return PTR_ERR(rstc); + } + /* * We need to soft reset the cpu when we turn off the cpu power domain, * or else the active processors might be stalled when the individual * processor is powered down. */ - if (read_cpuid_part() != ARM_CPU_PART_CORTEX_A9) { - struct reset_control *rstc = rockchip_get_core_reset(pd); - - if (IS_ERR(rstc)) { - pr_err("%s: could not get reset control for core %d\n", - __func__, pd); - return PTR_ERR(rstc); - } - - if (on) - reset_control_deassert(rstc); - else - reset_control_assert(rstc); - - reset_control_put(rstc); - } + if (!IS_ERR(rstc) && !on) + reset_control_assert(rstc); ret = regmap_update_bits(pmu, PMU_PWRDN_CON, BIT(pd), val); if (ret < 0) { @@ -112,6 +105,12 @@ static int pmu_set_power_domain(int pd, bool on) } } + if (!IS_ERR(rstc)) { + if (on) + reset_control_deassert(rstc); + reset_control_put(rstc); + } + return 0; } @@ -147,10 +146,13 @@ static int __cpuinit rockchip_boot_secondary(unsigned int cpu, * the mailbox: * sram_base_addr + 4: 0xdeadbeaf * sram_base_addr + 8: start address for pc + * The cpu0 need to wait the other cpus other than cpu0 entering + * the wfe state.The wait time is affected by many aspects. + * (e.g: cpu frequency, bootrom frequency, sram frequency, ...) * */ - udelay(10); - writel(virt_to_phys(rockchip_secondary_startup), - sram_base_addr + 8); + mdelay(1); /* ensure the cpus other than cpu0 to startup */ + + writel(virt_to_phys(secondary_startup), sram_base_addr + 8); writel(0xDEADBEAF, sram_base_addr + 4); dsb_sev(); } @@ -189,7 +191,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node) } /* set the boot function for the sram code */ - rockchip_boot_fn = virt_to_phys(rockchip_secondary_startup); + rockchip_boot_fn = virt_to_phys(secondary_startup); /* copy the trampoline to sram, that runs during startup of the core */ memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index afc60bad6fd6..476092b86c6e 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -14,7 +14,6 @@ extern void shmobile_smp_sleep(void); extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg); extern int shmobile_smp_cpu_disable(unsigned int cpu); -extern void shmobile_invalidate_start(void); extern void shmobile_boot_scu(void); extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus); extern void shmobile_smp_scu_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S index 69df8bfac167..fa5248c52399 100644 --- a/arch/arm/mach-shmobile/headsmp-scu.S +++ b/arch/arm/mach-shmobile/headsmp-scu.S @@ -22,7 +22,7 @@ * Boot code for secondary CPUs. * * First we turn on L1 cache coherency for our CPU. Then we jump to - * shmobile_invalidate_start that invalidates the cache and hands over control + * secondary_startup that invalidates the cache and hands over control * to the common ARM startup code. */ ENTRY(shmobile_boot_scu) @@ -36,7 +36,7 @@ ENTRY(shmobile_boot_scu) bic r2, r2, r3 @ Clear bits of our CPU (Run Mode) str r2, [r0, #8] @ write back - b shmobile_invalidate_start + b secondary_startup ENDPROC(shmobile_boot_scu) .text diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index 50c491567e11..330c1fc63197 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -16,13 +16,6 @@ #include <asm/assembler.h> #include <asm/memory.h> -#ifdef CONFIG_SMP -ENTRY(shmobile_invalidate_start) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(shmobile_invalidate_start) -#endif - /* * Reset vector for secondary CPUs. * This will be mapped at address 0 by SBAR register. diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index f483b560b066..b0790fc32282 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -133,7 +133,7 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus, int shmobile_smp_apmu_boot_secondary(unsigned int cpu, struct task_struct *idle) { /* For this particular CPU register boot vector */ - shmobile_smp_hook(cpu, virt_to_phys(shmobile_invalidate_start), 0); + shmobile_smp_hook(cpu, virt_to_phys(secondary_startup), 0); return apmu_wrap(cpu, apmu_power_on); } diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index a0f3b1cd497c..767c09e954a0 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -31,7 +31,6 @@ #define RSTMGR_MPUMODRST_CPU1 0x2 /* CPU1 Reset */ -extern void socfpga_secondary_startup(void); extern void __iomem *socfpga_scu_base_addr; extern void socfpga_init_clocks(void); diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S index f65ea0af4af3..5bb016427107 100644 --- a/arch/arm/mach-socfpga/headsmp.S +++ b/arch/arm/mach-socfpga/headsmp.S @@ -30,8 +30,3 @@ ENTRY(secondary_trampoline) 1: .long . .long socfpga_cpu1start_addr ENTRY(secondary_trampoline_end) - -ENTRY(socfpga_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(socfpga_secondary_startup) diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index c64d89b7c0ca..79c5336c569f 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -40,7 +40,7 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); - writel(virt_to_phys(socfpga_secondary_startup), + writel(virt_to_phys(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); flush_cache_all(); diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index e48a74458c25..fffad2426ee4 100644 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += pm-tegra30.o ifeq ($(CONFIG_CPU_IDLE),y) obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpuidle-tegra30.o endif -obj-$(CONFIG_SMP) += platsmp.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o obj-$(CONFIG_ARCH_TEGRA_114_SOC) += sleep-tegra30.o diff --git a/arch/arm/mach-tegra/headsmp.S b/arch/arm/mach-tegra/headsmp.S deleted file mode 100644 index 2072e7322c39..000000000000 --- a/arch/arm/mach-tegra/headsmp.S +++ /dev/null @@ -1,12 +0,0 @@ -#include <linux/linkage.h> -#include <linux/init.h> - -#include "sleep.h" - - .section ".text.head", "ax" - -ENTRY(tegra_secondary_startup) - check_cpu_part_num 0xc09, r8, r9 - bleq v7_invalidate_l1 - b secondary_startup -ENDPROC(tegra_secondary_startup) diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 894c5c472184..6fd9db54887e 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -94,7 +94,7 @@ void __init tegra_cpu_reset_handler_init(void) __tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_PRESENT] = *((u32 *)cpu_possible_mask); __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_SECONDARY] = - virt_to_phys((void *)tegra_secondary_startup); + virt_to_phys((void *)secondary_startup); #endif #ifdef CONFIG_PM_SLEEP diff --git a/arch/arm/mach-tegra/reset.h b/arch/arm/mach-tegra/reset.h index 29c3dec0126a..9c479c7925b8 100644 --- a/arch/arm/mach-tegra/reset.h +++ b/arch/arm/mach-tegra/reset.h @@ -37,7 +37,6 @@ void __tegra_cpu_reset_handler_start(void); void __tegra_cpu_reset_handler(void); void __tegra20_cpu1_resettable_status_offset(void); void __tegra_cpu_reset_handler_end(void); -void tegra_secondary_startup(void); #ifdef CONFIG_PM_SLEEP #define tegra_cpu_lp1_mask \ diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index 382c60e9aa16..7038cae95ddc 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -17,8 +17,6 @@ #ifndef __MACH_ZYNQ_COMMON_H__ #define __MACH_ZYNQ_COMMON_H__ -void zynq_secondary_startup(void); - extern int zynq_slcr_init(void); extern int zynq_early_slcr_init(void); extern void zynq_slcr_system_reset(void); diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S index dd8c071941e7..045c72720a4d 100644 --- a/arch/arm/mach-zynq/headsmp.S +++ b/arch/arm/mach-zynq/headsmp.S @@ -22,8 +22,3 @@ zynq_secondary_trampoline_jump: .globl zynq_secondary_trampoline_end zynq_secondary_trampoline_end: ENDPROC(zynq_secondary_trampoline) - -ENTRY(zynq_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(zynq_secondary_startup) diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c index 52d768ff7857..f66816c49186 100644 --- a/arch/arm/mach-zynq/platsmp.c +++ b/arch/arm/mach-zynq/platsmp.c @@ -87,10 +87,9 @@ int zynq_cpun_start(u32 address, int cpu) } EXPORT_SYMBOL(zynq_cpun_start); -static int zynq_boot_secondary(unsigned int cpu, - struct task_struct *idle) +static int zynq_boot_secondary(unsigned int cpu, struct task_struct *idle) { - return zynq_cpun_start(virt_to_phys(zynq_secondary_startup), cpu); + return zynq_cpun_start(virt_to_phys(secondary_startup), cpu); } /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3d1054f11a8a..7911f14c2157 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -268,7 +268,10 @@ __v7_ca15mp_setup: __v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 -1: +1: adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP @@ -277,7 +280,7 @@ __v7_ca17mp_setup: orreq r0, r0, r10 @ Enable CPU-specific SMP bits mcreq p15, 0, r0, c1, c0, 1 #endif - b __v7_setup + b __v7_setup_cont __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -335,10 +338,11 @@ __v7_pj4b_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_louis - ldmia r12, {r0-r5, r7, r9, r11, lr} + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} +__v7_setup_cont: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 @@ -460,7 +464,7 @@ ENDPROC(__v7_setup) .align 2 __v7_setup_stack: - .space 4 * 11 @ 11 registers + .space 4 * 7 @ 12 registers __INITDATA diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8aa791051029..1160434eece0 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -6,9 +6,15 @@ obj-vdso := vgettimeofday.o datapage.o targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING -ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds @@ -40,10 +46,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Actual build commands quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ - $(call cc-ldoption, -Wl$(comma)--build-id) \ - -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ - -Wl,-z,common-page-size=4096 -o $@ + cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7796af4b1d6f..6f0a3b41b009 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,10 @@ config NO_IOPORT_MAP config STACKTRACE_SUPPORT def_bool y +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config LOCKDEP_SUPPORT def_bool y @@ -409,6 +413,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4d2a925998f9..81151663ef38 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -30,6 +30,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f800d45ea226..44a59c20e773 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -114,6 +114,14 @@ extern phys_addr_t memstart_addr; #define PHYS_OFFSET ({ memstart_addr; }) /* + * The maximum physical address that the linear direct mapping + * of system RAM can cover. (PAGE_OFFSET can be interpreted as + * a 2's complement signed quantity and negated to derive the + * maximum size of the linear mapping.) + */ +#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; }) + +/* * PFNs are used to describe any physical page; this means * PFN 0 == physical address 0. * diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 352962bc2e78..5170fd5c8e97 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -257,7 +257,8 @@ static bool __init efi_virtmap_init(void) */ if (!is_normal_ram(md)) prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE) + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) prot = PAGE_KERNEL_EXEC; else prot = PAGE_KERNEL; diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 08cafc518b9a..0f03a8fe2314 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -178,6 +178,24 @@ ENTRY(ftrace_stub) ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* save return value regs*/ + .macro save_return_regs + sub sp, sp, #64 + stp x0, x1, [sp] + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + .endm + + /* restore return value regs*/ + .macro restore_return_regs + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + add sp, sp, #64 + .endm + /* * void ftrace_graph_caller(void) * @@ -204,11 +222,11 @@ ENDPROC(ftrace_graph_caller) * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) - str x0, [sp, #-16]! + save_return_regs mov x0, x29 // parent's fp bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); mov x30, x0 // restore the original return address - ldr x0, [sp], #16 + restore_return_regs ret END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3dca15634e69..c31e59fe2cb8 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -157,6 +157,7 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 19f915e8f6e0..36aa31ff2c06 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -565,6 +565,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf4107f6ef..876eb8df50bf 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cce18c85d2e8..7778453762d8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1318,7 +1318,7 @@ static int armpmu_device_probe(struct platform_device *pdev) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) - return 0; + goto out; irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) @@ -1355,6 +1355,7 @@ static int armpmu_device_probe(struct platform_device *pdev) else kfree(irqs); +out: cpu_pmu->plat_device = pdev; return 0; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index d26fcd4cd6e6..c58aee062590 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitely for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif break; @@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE)) @@ -213,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -236,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -264,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -274,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 5befd010e232..64f9e60b31da 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -844,8 +844,6 @@ mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -853,6 +851,9 @@ str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 @@ -947,13 +948,15 @@ ENTRY(__kvm_vcpu_run) // Guest context add x2, x0, #VCPU_CONTEXT + // We must restore the 32-bit state before the sysregs, thanks + // to Cortex-A57 erratum #852523. + restore_guest_32bit_state bl __restore_sysregs bl __restore_fpsimd skip_debug_state x3, 1f bl __restore_debug 1: - restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index f02530e726f6..85c57158dcd9 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -168,8 +168,8 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, false, addr); - - inject_abt64(vcpu, false, addr); + else + inject_abt64(vcpu, false, addr); } /** @@ -184,8 +184,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, true, addr); - - inject_abt64(vcpu, true, addr); + else + inject_abt64(vcpu, true, addr); } /** @@ -198,6 +198,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_undef32(vcpu); - - inject_undef64(vcpu); + else + inject_undef64(vcpu); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 96da13167d4a..fa5efaa5c3ac 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -279,6 +279,7 @@ retry: * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 28a09529f206..3a7692745868 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -86,6 +86,7 @@ decompress_kernel(int mmu_on, unsigned char *zimage_data, free_mem_end_ptr = free_mem_ptr + BOOT_HEAP_SIZE; puts("\nDecompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output_data, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output_data, 0, + NULL, error); puts("done.\nBooting the kernel.\n"); } diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h index 5a822bb790f7..066e74f666ae 100644 --- a/arch/m68k/include/asm/linkage.h +++ b/arch/m68k/include/asm/linkage.h @@ -4,4 +4,34 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "m" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) + #endif diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 7fc8397d16f2..fd2a36a79f97 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -186,6 +186,7 @@ int get_c0_perfcount_int(void) { return ATH79_MISC_IRQ(5); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 54831069a206..080cd53bac36 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -111,8 +111,8 @@ void decompress_kernel(unsigned long boot_heap_start) puts("\n"); /* Decompress the kernel with according algorithm */ - decompress((char *)zimage_start, zimage_size, 0, 0, - (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, error); + __decompress((char *)zimage_start, zimage_size, 0, 0, + (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); /* FIXME: should we flush cache here? */ puts("Now, booting the kernel...\n"); diff --git a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h deleted file mode 100644 index 11d3b572b1b3..000000000000 --- a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_MACH_BCM63XX_DMA_COHERENCE_H -#define __ASM_MACH_BCM63XX_DMA_COHERENCE_H - -#include <asm/bmips.h> - -#define plat_post_dma_flush bmips_post_dma_flush - -#include <asm/mach-generic/dma-coherence.h> - -#endif /* __ASM_MACH_BCM63XX_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 819af9d057a8..70f6e7f073b0 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -182,8 +182,39 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ +#ifdef CONFIG_SMP + /* + * For SMP, multiple CPUs can race, so we need to do + * this atomically. + */ +#ifdef CONFIG_64BIT +#define LL_INSN "lld" +#define SC_INSN "scd" +#else /* CONFIG_32BIT */ +#define LL_INSN "ll" +#define SC_INSN "sc" +#endif + unsigned long page_global = _PAGE_GLOBAL; + unsigned long tmp; + + __asm__ __volatile__ ( + " .set push\n" + " .set noreorder\n" + "1: " LL_INSN " %[tmp], %[buddy]\n" + " bnez %[tmp], 2f\n" + " or %[tmp], %[tmp], %[global]\n" + " " SC_INSN " %[tmp], %[buddy]\n" + " beqz %[tmp], 1b\n" + " nop\n" + "2:\n" + " .set pop" + : [buddy] "+m" (buddy->pte), + [tmp] "=&r" (tmp) + : [global] "r" (page_global)); +#else /* !CONFIG_SMP */ if (pte_none(*buddy)) pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; +#endif /* CONFIG_SMP */ } #endif } diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 28d6d9364bd1..a71da576883c 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -152,6 +152,31 @@ .set noreorder bltz k0, 8f move k1, sp +#ifdef CONFIG_EVA + /* + * Flush interAptiv's Return Prediction Stack (RPS) by writing + * EntryHi. Toggling Config7.RPS is slower and less portable. + * + * The RPS isn't automatically flushed when exceptions are + * taken, which can result in kernel mode speculative accesses + * to user addresses if the RPS mispredicts. That's harmless + * when user and kernel share the same address space, but with + * EVA the same user segments may be unmapped to kernel mode, + * even containing sensitive MMIO regions or invalid memory. + * + * This can happen when the kernel sets the return address to + * ret_from_* and jr's to the exception handler, which looks + * more like a tail call than a function call. If nested calls + * don't evict the last user address in the RPS, it will + * mispredict the return and fetch from a user controlled + * address into the icache. + * + * More recent EVA-capable cores with MAAR to restrict + * speculative accesses aren't affected. + */ + MFC0 k0, CP0_ENTRYHI + MTC0 k0, CP0_ENTRYHI +#endif .set reorder /* Called from user mode, new stack. */ get_saved_sp diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 3e4491aa6d6b..789d7bf4fef3 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -154,7 +154,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { unsigned int real_len; - cpumask_t mask; + cpumask_t allowed, mask; int retval; struct task_struct *p; @@ -173,7 +173,8 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_and(&mask, &p->thread.user_cpus_allowed, cpu_possible_mask); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: read_unlock(&tasklist_lock); diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index 74bab9ddd0e1..c6bbf2165051 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -24,7 +24,7 @@ LEAF(relocate_new_kernel) process_entry: PTR_L s2, (s0) - PTR_ADD s0, s0, SZREG + PTR_ADDIU s0, s0, SZREG /* * In case of a kdump/crash kernel, the indirection page is not @@ -61,9 +61,9 @@ copy_word: /* copy page word by word */ REG_L s5, (s2) REG_S s5, (s4) - PTR_ADD s4, s4, SZREG - PTR_ADD s2, s2, SZREG - LONG_SUB s6, s6, 1 + PTR_ADDIU s4, s4, SZREG + PTR_ADDIU s2, s2, SZREG + LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word b process_entry diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index ad4d44635c76..a6f6b762c47a 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -80,7 +80,7 @@ syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_64_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 446cc654da56..4b2010654c46 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -72,7 +72,7 @@ n32_syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_N32_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 19a7705f2a01..5d7f2634996f 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d2d1c1933bc9..5f5f44edc77d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -192,6 +192,7 @@ static void show_stacktrace(struct task_struct *task, void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; + mm_segment_t old_fs = get_fs(); if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; @@ -210,7 +211,13 @@ void show_stack(struct task_struct *task, unsigned long *sp) prepare_frametrace(®s); } } + /* + * show_stack() deals exclusively with kernel mode, so be sure to access + * the stack in the kernel (not user) address space. + */ + set_fs(KERNEL_DS); show_stacktrace(task, ®s); + set_fs(old_fs); } static void show_code(unsigned int __user *pc) @@ -1518,6 +1525,7 @@ asmlinkage void do_mcheck(struct pt_regs *regs) const int field = 2 * sizeof(unsigned long); int multi_match = regs->cp0_status & ST0_TS; enum ctx_state prev_state; + mm_segment_t old_fs = get_fs(); prev_state = exception_enter(); show_regs(regs); @@ -1539,8 +1547,13 @@ asmlinkage void do_mcheck(struct pt_regs *regs) dump_tlb_all(); } + if (!user_mode(regs)) + set_fs(KERNEL_DS); + show_code((unsigned int __user *) regs->cp0_epc); + set_fs(old_fs); + /* * Some chips may have other causes of machine check (e.g. SB1 * graduation timer) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index af84bef0c90d..eb3efd137fd1 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -438,7 +438,7 @@ do { \ : "memory"); \ } while(0) -#define StoreDW(addr, value, res) \ +#define _StoreDW(addr, value, res) \ do { \ __asm__ __volatile__ ( \ ".set\tpush\n\t" \ diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 6ab10573490d..d01ade63492f 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -466,6 +466,7 @@ int get_c0_perfcount_int(void) { return ltq_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c index 22f04ca2ff3e..2efb18aafa4f 100644 --- a/arch/mips/loongson/common/env.c +++ b/arch/mips/loongson/common/env.c @@ -64,6 +64,9 @@ void __init prom_init_env(void) } if (memsize == 0) memsize = 256; + + loongson_sysconf.nr_uarts = 1; + pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); #else struct boot_params *boot_p; diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 6983fcd48131..2b95e34fa9e8 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1137,7 +1137,7 @@ emul: break; case mfhc_op: - if (!cpu_has_mips_r2) + if (!cpu_has_mips_r2_r6) goto sigill; /* copregister rd -> gpr[rt] */ @@ -1148,7 +1148,7 @@ emul: break; case mthc_op: - if (!cpu_has_mips_r2) + if (!cpu_has_mips_r2_r6) goto sigill; /* copregister rd <- gpr[rt] */ @@ -1181,6 +1181,24 @@ emul: } break; + case bc1eqz_op: + case bc1nez_op: + if (!cpu_has_mips_r6 || delay_slot(xcp)) + return SIGILL; + + cond = likely = 0; + switch (MIPSInst_RS(ir)) { + case bc1eqz_op: + if (get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1) + cond = 1; + break; + case bc1nez_op: + if (!(get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1)) + cond = 1; + break; + } + goto branch_common; + case bc_op: if (delay_slot(xcp)) return SIGILL; @@ -1207,7 +1225,7 @@ emul: case bct_op: break; } - +branch_common: set_delay_slot(xcp); if (cond) { /* diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 609d1241b0c4..371eec113659 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -100,7 +100,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) else #endif #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8)) dma_flag = __GFP_DMA; else #endif diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f45..a7f7d9ffb402 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -148,6 +148,7 @@ int get_c0_perfcount_int(void) return mips_cpu_perf_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { @@ -165,14 +166,17 @@ unsigned int get_c0_compare_int(void) static void __init init_rtc(void) { - /* stop the clock whilst setting it up */ - CMOS_WRITE(RTC_SET | RTC_24H, RTC_CONTROL); + unsigned char freq, ctrl; - /* 32KHz time base */ - CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); + /* Set 32KHz time base if not already set */ + freq = CMOS_READ(RTC_FREQ_SELECT); + if ((freq & RTC_DIV_CTL) != RTC_REF_CLCK_32KHZ) + CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); - /* start the clock */ - CMOS_WRITE(RTC_24H, RTC_CONTROL); + /* Ensure SET bit is clear so RTC can run */ + ctrl = CMOS_READ(RTC_CONTROL); + if (ctrl & RTC_SET) + CMOS_WRITE(ctrl & ~RTC_SET, RTC_CONTROL); } void __init plat_time_init(void) diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index e1d69895fb1d..a120b7a5a8fe 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -77,6 +77,7 @@ int get_c0_perfcount_int(void) return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; return -1; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa..ab73f6f405bb 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -26,6 +26,7 @@ int get_c0_perfcount_int(void) { return gic_get_c0_perfcount_int(); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); void __init plat_time_init(void) { diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 7cf91b92e9d1..199ace4ca1ad 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -89,6 +89,7 @@ int get_c0_perfcount_int(void) { return rt_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index f3191db6e2e9..c0eab24f6a9e 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -507,8 +507,8 @@ void do_cpu_irq_mask(struct pt_regs *regs) struct pt_regs *old_regs; unsigned long eirr_val; int irq, cpu = smp_processor_id(); -#ifdef CONFIG_SMP struct irq_desc *desc; +#ifdef CONFIG_SMP cpumask_t dest; #endif @@ -521,8 +521,12 @@ void do_cpu_irq_mask(struct pt_regs *regs) goto set_out; irq = eirr_to_irq(eirr_val); -#ifdef CONFIG_SMP + /* Filter out spurious interrupts, mostly from serial port at bootup */ desc = irq_to_desc(irq); + if (unlikely(!desc->action)) + goto set_out; + +#ifdef CONFIG_SMP cpumask_copy(&dest, desc->irq_data.affinity); if (irqd_is_per_cpu(&desc->irq_data) && !cpumask_test_cpu(smp_processor_id(), &dest)) { diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 7ef22e3387e0..0b8d26d3ba43 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -821,7 +821,7 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT 19: ldd,ma 0(%sr3,%r26), %r29 - sub,= %r29, %r25, %r0 + sub,*= %r29, %r25, %r0 b,n cas2_end 20: std,ma %r24, 0(%sr3,%r26) copy %r0, %r28 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73eddda53b8e..4eec430d8fa8 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -28,6 +28,9 @@ BOOTCFLAGS += -m64 endif ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian +else +BOOTCFLAGS += -mlittle-endian +BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 43e6ad424c7f..88d27e3258d2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -135,7 +135,19 @@ #define pte_iterate_hashed_end() } while(0) #ifdef CONFIG_PPC_HAS_HASH_64K -#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr) +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) \ + ({ \ + unsigned int psize; \ + if (is_kernel_addr(addr)) \ + psize = MMU_PAGE_4K; \ + else \ + psize = get_slice_psize(mm, addr); \ + psize; \ + }) #else #define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K #endif diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 7a4ede16b283..b77ef369c0f0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -343,6 +343,7 @@ extern void rtas_power_off(void); extern void rtas_halt(void); extern void rtas_os_term(char *str); extern int rtas_get_sensor(int sensor, int index, int *state); +extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); extern bool rtas_indicator_present(int token, int *maxindex); diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 58abeda64cb7..15cca17cba4b 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -29,6 +29,7 @@ static inline void save_early_sprs(struct thread_struct *prev) {} extern void enable_kernel_fp(void); extern void enable_kernel_altivec(void); +extern void enable_kernel_vsx(void); extern int emulate_altivec(struct pt_regs *); extern void __giveup_vsx(struct task_struct *); extern void giveup_vsx(struct task_struct *); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9ee61d15653d..cb565ad0a5b6 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -310,11 +310,26 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) if (!(pe->type & EEH_PE_PHB)) { if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + + /* + * The config space of some PCI devices can't be accessed + * when their PEs are in frozen state. Otherwise, fenced + * PHB might be seen. Those PEs are identified with flag + * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED + * is set automatically when the PE is put to EEH_PE_ISOLATED. + * + * Restoring BARs possibly triggers PCI config access in + * (OPAL) firmware and then causes fenced PHB. If the + * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's + * pointless to restore BARs and dump config space. + */ eeh_ops->configure_bridge(pe); - eeh_pe_restore_bars(pe); + if (!(pe->state & EEH_PE_CFG_BLOCKED)) { + eeh_pe_restore_bars(pe); - pci_regs_buf[0] = 0; - eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); + pci_regs_buf[0] = 0; + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); + } } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); @@ -1118,9 +1133,6 @@ void eeh_add_device_late(struct pci_dev *dev) return; } - if (eeh_has_flag(EEH_PROBE_MODE_DEV)) - eeh_ops->probe(pdn, NULL); - /* * The EEH cache might not be removed correctly because of * unbalanced kref to the device during unplug time, which @@ -1144,6 +1156,9 @@ void eeh_add_device_late(struct pci_dev *dev) dev->dev.archdata.edev = NULL; } + if (eeh_has_flag(EEH_PROBE_MODE_DEV)) + eeh_ops->probe(pdn, NULL); + edev->pdev = dev; dev->dev.archdata.edev = edev; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index febb50dd5328..0596373cd1c3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -204,8 +204,6 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -#if 0 -/* not currently used, but some crazy RAID module might want to later */ void enable_kernel_vsx(void) { WARN_ON(preemptible()); @@ -220,7 +218,6 @@ void enable_kernel_vsx(void) #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_vsx); -#endif void giveup_vsx(struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7a488c108410..caffb10e7aa3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -584,6 +584,23 @@ int rtas_get_sensor(int sensor, int index, int *state) } EXPORT_SYMBOL(rtas_get_sensor); +int rtas_get_sensor_fast(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + rc = rtas_call(token, 2, 2, state, sensor, index); + WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + bool rtas_indicator_present(int token, int *maxindex) { int proplen, count, i; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d3a831ac0f92..da50e0c9c57e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 453a8a47a467..964c0ce584ce 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -826,12 +826,15 @@ int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) unsigned long size = kvmppc_get_gpr(vcpu, 4); unsigned long addr = kvmppc_get_gpr(vcpu, 5); u64 buf; + int srcu_idx; int ret; if (!is_power_of_2(size) || (size > sizeof(buf))) return H_TOO_HARD; + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; @@ -866,6 +869,7 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) unsigned long addr = kvmppc_get_gpr(vcpu, 5); unsigned long val = kvmppc_get_gpr(vcpu, 6); u64 buf; + int srcu_idx; int ret; switch (size) { @@ -889,7 +893,9 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) return H_TOO_HARD; } + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index b027a89737b6..c6d601cc9764 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -421,14 +421,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - u64 pte1; - - pte1 = be64_to_cpu(hpte[1]); hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(v, pte1, pte_index); + rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); - /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, pte1); + /* + * The reference (R) and change (C) bits in a HPT + * entry can be set by hardware at any time up until + * the HPTE is invalidated and the TLB invalidation + * sequence has completed. This means that when + * removing a HPTE, we need to re-read the HPTE after + * the invalidation sequence has completed in order to + * obtain reliable values of R and C. + */ + remove_revmap_chain(kvm, pte_index, rev, v, + be64_to_cpu(hpte[1])); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4d70df26c402..ffd98b2bfa16 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1127,6 +1127,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 beq 4f b guest_exit_cont 3: @@ -1170,6 +1171,7 @@ mc_cont: bl kvmhv_accumulate_time #endif + mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit nop diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 43dafb9d6a46..4d87122cf6a7 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -85,7 +85,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -101,6 +100,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -126,6 +126,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bca2aeb6e4b6..3ff29cf6d05c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -99,6 +99,7 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -106,10 +107,10 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 02e4a1745516..3b6647e574b6 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -189,7 +189,8 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) int state; int critical; - status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, + &state); if (state > 3) critical = 1; /* Time Critical */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index df6a7041922b..e6e8b241d717 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -268,6 +268,11 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act eeh_dev_init(PCI_DN(np), pci->phb); } break; + case OF_RECONFIG_DETACH_NODE: + pci = PCI_DN(np); + if (pci) + list_del(&pci->list); + break; default: err = NOTIFY_DONE; break; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index f086c6f22dc9..fd16cb5d83f3 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -128,15 +128,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index a3f660eed6de..89496cf4e04d 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -65,6 +65,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -72,10 +73,11 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, + hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index b2cef1809389..13a34b237559 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -107,15 +107,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 6e2e6aa378bb..02a137daa182 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -124,16 +124,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index d4788111c161..fac6ac9790fa 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -10,7 +10,7 @@ targets += misc.o piggy.o sizes.h head.o KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 42506b371b74..4da604ebf6fd 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -167,7 +167,7 @@ unsigned long decompress_kernel(void) #endif puts("Uncompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); puts("Ok, booting the kernel.\n"); return (unsigned long) output; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index fe8d6924efaa..c78ba51ae285 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -48,6 +48,19 @@ typedef struct struct ucontext32 uc; } rt_sigframe32; +static inline void sigset_to_sigset32(unsigned long *set64, + compat_sigset_word *set32) +{ + set32[0] = (compat_sigset_word) set64[0]; + set32[1] = (compat_sigset_word)(set64[0] >> 32); +} + +static inline void sigset32_to_sigset(compat_sigset_word *set32, + unsigned long *set64) +{ + set64[0] = (unsigned long) set32[0] | ((unsigned long) set32[1] << 32); +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; @@ -303,10 +316,12 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) + if (__copy_from_user(&cset.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; @@ -323,10 +338,12 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + if (__copy_from_user(&cset, &frame->uc.uc_sigmask, sizeof(cset))) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; @@ -397,7 +414,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, return -EFAULT; /* Create struct sigcontext32 on the signal stack */ - memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sigset_to_sigset32(set->sig, sc.oldmask); sc.sregs = (__u32)(unsigned long __force) &frame->sregs; if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; @@ -458,6 +475,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + compat_sigset_t cset; rt_sigframe32 __user *frame; unsigned long restorer; size_t frame_size; @@ -505,11 +523,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, store_sigregs(); /* Create ucontext on the signal stack. */ + sigset_to_sigset32(set->sig, cset.sig); if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(0, &frame->uc.uc_link) || __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs32(regs, &frame->uc.uc_mcontext) || - __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) || save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7262fe438c99..1942f22e6694 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -683,7 +683,7 @@ static void __init setup_memory(void) /* * Setup hardware capabilities. */ -static void __init setup_hwcaps(void) +static int __init setup_hwcaps(void) { static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; struct cpuid cpu_id; @@ -749,9 +749,11 @@ static void __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_TE; /* - * Vector extension HWCAP_S390_VXRS is bit 11. + * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension + * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX + * instead of facility bit 129. */ - if (test_facility(129)) + if (MACHINE_HAS_VX) elf_hwcap |= HWCAP_S390_VXRS; get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); @@ -788,7 +790,9 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + return 0; } +arch_initcall(setup_hwcaps); /* * Add system information as device randomness @@ -871,11 +875,6 @@ void __init setup_arch(char **cmdline_p) cpu_init(); /* - * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). - */ - setup_hwcaps(); - - /* * Create kernel page tables and switch to virtual addressing. */ paging_init(); diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index 95470a472d2c..208a9753ab38 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -132,7 +132,7 @@ void decompress_kernel(void) puts("Uncompressing Linux... "); cache_control(CACHE_ENABLE); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); cache_control(CACHE_DISABLE); puts("Ok, booting the kernel.\n"); } diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 1f0aa2024e94..6424249d5f78 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -28,16 +28,10 @@ * Must preserve %o5 between VISEntryHalf and VISExitHalf */ #define VISEntryHalf \ - rd %fprs, %o5; \ - andcc %o5, FPRS_FEF, %g0; \ - be,pt %icc, 297f; \ - sethi %hi(298f), %g7; \ - sethi %hi(VISenterhalf), %g1; \ - jmpl %g1 + %lo(VISenterhalf), %g0; \ - or %g7, %lo(298f), %g7; \ - clr %o5; \ -297: wr %o5, FPRS_FEF, %fprs; \ -298: + VISEntry + +#define VISExitHalf \ + VISExit #define VISEntryHalfFast(fail_label) \ rd %fprs, %o5; \ @@ -47,7 +41,7 @@ ba,a,pt %xcc, fail_label; \ 297: wr %o5, FPRS_FEF, %fprs; -#define VISExitHalf \ +#define VISExitHalfFast \ wr %o5, 0, %fprs; #ifndef __ASSEMBLY__ diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 140527a20e7d..83aeeb1dffdb 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -240,8 +240,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +#ifdef NON_USER_COPY + VISExitHalfFast +#else VISExitHalf - +#endif brz,pn %o2, .Lexit cmp %o2, 19 ble,pn %icc, .Lsmall_unaligned diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index b320ae9e2e2e..a063d84336d6 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -44,9 +44,8 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stx %g3, [%g6 + TI_GSR] 2: add %g6, %g1, %g3 - cmp %o5, FPRS_DU - be,pn %icc, 6f - sll %g1, 3, %g1 + mov FPRS_DU | FPRS_DL | FPRS_FEF, %o5 + sll %g1, 3, %g1 stb %o5, [%g3 + TI_FPSAVED] rd %gsr, %g2 add %g6, %g1, %g3 @@ -80,65 +79,3 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop - -6: ldub [%g3 + TI_FPSAVED], %o5 - or %o5, FPRS_DU, %o5 - add %g6, TI_FPREGS+0x80, %g2 - stb %o5, [%g3 + TI_FPSAVED] - - sll %g1, 5, %g1 - add %g6, TI_FPREGS+0xc0, %g3 - wr %g0, FPRS_FEF, %fprs - membar #Sync - stda %f32, [%g2 + %g1] ASI_BLK_P - stda %f48, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 80f - nop - - .align 32 -80: jmpl %g7 + %g0, %g0 - nop - - .align 32 -VISenterhalf: - ldub [%g6 + TI_FPDEPTH], %g1 - brnz,a,pn %g1, 1f - cmp %g1, 1 - stb %g0, [%g6 + TI_FPSAVED] - stx %fsr, [%g6 + TI_XFSR] - clr %o5 - jmpl %g7 + %g0, %g0 - wr %g0, FPRS_FEF, %fprs - -1: bne,pn %icc, 2f - srl %g1, 1, %g1 - ba,pt %xcc, vis1 - sub %g7, 8, %g7 -2: addcc %g6, %g1, %g3 - sll %g1, 3, %g1 - andn %o5, FPRS_DU, %g2 - stb %g2, [%g3 + TI_FPSAVED] - - rd %gsr, %g2 - add %g6, %g1, %g3 - stx %g2, [%g3 + TI_GSR] - add %g6, %g1, %g2 - stx %fsr, [%g2 + TI_XFSR] - sll %g1, 5, %g1 -3: andcc %o5, FPRS_DL, %g0 - be,pn %icc, 4f - add %g6, TI_FPREGS, %g2 - - add %g6, TI_FPREGS+0x40, %g3 - membar #Sync - stda %f0, [%g2 + %g1] ASI_BLK_P - stda %f16, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 4f - nop - - .align 32 -4: and %o5, FPRS_DU, %o5 - jmpl %g7 + %g0, %g0 - wr %o5, FPRS_FEF, %fprs diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 1d649a95660c..8069ce12f20b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -135,10 +135,6 @@ EXPORT_SYMBOL(copy_user_page); void VISenter(void); EXPORT_SYMBOL(VISenter); -/* CRYPTO code needs this */ -void VISenterhalf(void); -EXPORT_SYMBOL(VISenterhalf); - extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, unsigned long *); diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index e8c2c04143cd..c667e104a0c2 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) return -EFAULT; - memset(to, 0, sizeof(*to)); - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); diff --git a/arch/unicore32/boot/compressed/misc.c b/arch/unicore32/boot/compressed/misc.c index 176d5bda3559..5c65dfee278c 100644 --- a/arch/unicore32/boot/compressed/misc.c +++ b/arch/unicore32/boot/compressed/misc.c @@ -119,8 +119,8 @@ unsigned long decompress_kernel(unsigned long output_start, output_ptr = get_unaligned_le32(tmp); arch_decomp_puts("Uncompressing Linux..."); - decompress(input_data, input_data_end - input_data, NULL, NULL, - output_data, NULL, error); + __decompress(input_data, input_data_end - input_data, NULL, NULL, + output_data, 0, NULL, error); arch_decomp_puts(" done, booting the kernel.\n"); return output_ptr; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a107b935e22f..e28437e0f708 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -424,7 +424,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, #endif debug_putstr("\nDecompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, output_len, + NULL, error); parse_elf(output); /* * 32-bit always performs relocations. 64-bit relocations are only diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 2079baf06bdd..daf8d2b9a217 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -294,6 +294,7 @@ static struct ahash_alg ghash_async_alg = { .cra_name = "ghash", .cra_driver_name = "ghash-clmulni", .cra_priority = 400, + .cra_ctxsize = sizeof(struct ghash_async_ctx), .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_type = &crypto_ahash_type, diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a0bf89fd2647..4e10d73cf018 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -280,21 +280,6 @@ static inline void clear_LDT(void) set_ldt(NULL, 0); } -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 09b9620a73b4..364d27481a52 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,8 +9,7 @@ * we put the segment information here. */ typedef struct { - void *ldt; - int size; + struct ldt_struct *ldt; #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index e997f70f80c4..80d67dd80351 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -34,6 +34,50 @@ static inline void load_mm_cr4(struct mm_struct *mm) {} #endif /* + * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + /* + * Xen requires page-aligned LDTs with special permissions. This is + * needed to prevent us from installing evil descriptors such as + * call gates. On native, we could merge the ldt_struct and LDT + * allocations, but it's not worth trying to optimize. + */ + struct desc_struct *entries; + int size; +}; + +static inline void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* lockless_dereference synchronizes with smp_store_release */ + ldt = lockless_dereference(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) + set_ldt(ldt->entries, ldt->size); + else + clear_LDT(); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} + +/* * Used for LDT copy/destruction. */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); @@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we - * never free an LDT while the mm still exists. That - * means that next->context.ldt != prev->context.ldt, - * because mms never share an LDT. + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); + load_mm_ldt(next); } #ifdef CONFIG_SMP else { @@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); - load_LDT_nolock(&next->context); + load_mm_ldt(next); } } #endif diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 6fe6b182c998..9dfce4e0417d 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -57,9 +57,9 @@ struct sigcontext { unsigned long ip; unsigned long flags; unsigned short cs; - unsigned short __pad2; /* Was called gs, but was always zero. */ - unsigned short __pad1; /* Was called fs, but was always zero. */ - unsigned short ss; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; unsigned long err; unsigned long trapno; unsigned long oldmask; diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 16dc4e8a2cd3..d8b9f9081e86 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -177,24 +177,9 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; - - /* - * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), - * Linux saved and restored fs and gs in these slots. This - * was counterproductive, as fsbase and gsbase were never - * saved, so arch_prctl was presumably unreliable. - * - * If these slots are ever needed for any other purpose, there - * is some risk that very old 64-bit binaries could get - * confused. I doubt that many such binaries still work, - * though, since the same patch in 2.5.64 also removed the - * 64-bit set_thread_area syscall, so it appears that there is - * no TLS API that works in both pre- and post-2.5.64 kernels. - */ - __u16 __pad2; /* Was gs. */ - __u16 __pad1; /* Was fs. */ - - __u16 ss; + __u16 gs; + __u16 fs; + __u16 __pad0; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dbe76a14c3c9..07bea80223f6 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -489,6 +489,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef653193160..d1918a8c4393 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -325,10 +325,15 @@ done: static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dcb52850a28f..307a49828826 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } @@ -1424,7 +1431,7 @@ static inline void __x2apic_disable(void) { u64 msr; - if (cpu_has_apic) + if (!cpu_has_apic) return; rdmsrl(MSR_IA32_APICBASE, msr); @@ -1483,10 +1490,13 @@ void x2apic_setup(void) static __init void x2apic_disable(void) { - u32 x2apic_id; + u32 x2apic_id, state = x2apic_state; + + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; - if (x2apic_state != X2APIC_ON) - goto out; + if (state != X2APIC_ON) + return; x2apic_id = read_apic_id(); if (x2apic_id >= 255) @@ -1494,9 +1504,6 @@ static __init void x2apic_disable(void) __x2apic_disable(); register_lapic_address(mp_lapic_addr); -out: - x2apic_state = X2APIC_DISABLED; - x2apic_mode = 0; } static __init void x2apic_enable(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04dac8a..205e0f3df501 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1434,7 +1434,7 @@ void cpu_init(void) load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); clear_all_debug_regs(); dbg_restore_debug_regs(); @@ -1483,7 +1483,7 @@ void cpu_init(void) load_sp0(t, thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index b4a41cf030ed..e166d833cf63 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -116,6 +116,27 @@ void mce_intel_hcpu_update(unsigned long cpu) per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; } +static void cmci_toggle_interrupt_mode(bool on) +{ + unsigned long flags, *owned; + int bank; + u64 val; + + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + owned = this_cpu_ptr(mce_banks_owned); + for_each_set_bit(bank, owned, MAX_NR_BANKS) { + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + + if (on) + val |= MCI_CTL2_CMCI_EN; + else + val &= ~MCI_CTL2_CMCI_EN; + + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); + } + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); +} + unsigned long cmci_intel_adjust_timer(unsigned long interval) { if ((this_cpu_read(cmci_backoff_cnt) > 0) && @@ -145,7 +166,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) */ if (!atomic_read(&cmci_storm_on_cpus)) { __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); - cmci_reenable(); + cmci_toggle_interrupt_mode(true); cmci_recheck(); } return CMCI_POLL_INTERVAL; @@ -156,22 +177,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) } } -static void cmci_storm_disable_banks(void) -{ - unsigned long flags, *owned; - int bank; - u64 val; - - raw_spin_lock_irqsave(&cmci_discover_lock, flags); - owned = this_cpu_ptr(mce_banks_owned); - for_each_set_bit(bank, owned, MAX_NR_BANKS) { - rdmsrl(MSR_IA32_MCx_CTL2(bank), val); - val &= ~MCI_CTL2_CMCI_EN; - wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - } - raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); -} - static bool cmci_storm_detect(void) { unsigned int cnt = __this_cpu_read(cmci_storm_cnt); @@ -193,7 +198,7 @@ static bool cmci_storm_detect(void) if (cnt <= CMCI_STORM_THRESHOLD) return false; - cmci_storm_disable_banks(); + cmci_toggle_interrupt_mode(false); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); r = atomic_add_return(1, &cmci_storm_on_cpus); mce_timer_kick(CMCI_STORM_INTERVAL); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index aa4e3a74e541..4cc98a4e8ea9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2170,21 +2170,25 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { + struct ldt_struct *ldt; + if (idx > LDT_ENTRIES) return 0; - if (idx > current->active_mm->context.size) + /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = lockless_dereference(current->active_mm->context.ldt); + if (!ldt || idx > ldt->size) return 0; - desc = current->active_mm->context.ldt; + desc = &ldt->entries[idx]; } else { if (idx > GDT_ENTRIES) return 0; - desc = raw_cpu_ptr(gdt_page.gdt); + desc = raw_cpu_ptr(gdt_page.gdt) + idx; } - return get_desc_base(desc + idx); + return get_desc_base(desc); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2813ea0f142e..22212615a137 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2098,9 +2098,12 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = cpuc->event_constraint[idx]; + struct event_constraint *c1 = NULL; struct event_constraint *c2; + if (idx >= 0) /* fake does < 0 */ + c1 = cpuc->event_constraint[idx]; + /* * first time only * - static constraint: no change across incremental scheduling calls diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c76d3e37c6e1..403ace539b73 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -184,10 +184,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(unsigned long start_pfn, - unsigned long nr_pfn, void *arg) +static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) { - int *nr_ranges = arg; + unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; @@ -213,7 +212,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->image = image; - walk_system_ram_range(0, -1, &nr_ranges, + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); ced->max_nr_ranges = nr_ranges; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 02c2eff7478d..6c9cb6073832 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -793,8 +793,6 @@ retint_kernel: restore_c_regs_and_iret: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 - -irq_return: INTERRUPT_RETURN ENTRY(native_iret) @@ -1395,7 +1393,18 @@ END(error_exit) /* Runs on exception stack */ ENTRY(nmi) INTR_FRAME + /* + * Fix up the exception frame if we're on Xen. + * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most + * one value to the stack on native, so it may clobber the rdx + * scratch slot, but it won't clobber any of the important + * slots past it. + * + * Xen is a different story, because the Xen frame itself overlaps + * the "NMI executing" variable. + */ PARAVIRT_ADJUST_EXCEPTION_FRAME + /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. @@ -1413,11 +1422,12 @@ ENTRY(nmi) * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi + * o Modify the "iret" location to jump to the repeat_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable @@ -1426,32 +1436,154 @@ ENTRY(nmi) * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. */ /* Use %rdx as our temp variable throughout */ pushq_cfi %rdx CFI_REL_OFFSET rdx, 0 + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + + /* + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. + * + * We also must not push anything to the stack before switching + * stacks lest we corrupt the "NMI executing" variable. + */ + + SWAPGS_UNSAFE_STACK + cld + movq %rsp, %rdx + movq PER_CPU_VAR(kernel_stack), %rsp + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + pushq $-1 /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq (%rdx) /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. + */ + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ + SWAPGS + jmp restore_c_regs_and_iret + +.Lnmi_from_kernel: + /* + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. + */ + /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. + * Determine whether we're a nested NMI. + * + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call do_nmi anyway, so we can just + * resume the outer NMI. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi + + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: /* - * Check the special variable on the stack to see if NMIs are - * executing. + * Now check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. */ cmpl $1, -8(%rsp) je nested_nmi /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". */ lea 6*8(%rsp), %rdx /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ @@ -1462,25 +1594,21 @@ ENTRY(nmi) cmpq %rdx, 4*8(%rsp) /* If it is below the NMI stack, it is a normal NMI */ jb first_nmi - /* Ah, it is within the NMI stack, treat it as nested */ + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ CFI_REMEMBER_STATE nested_nmi: /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 1*8 @@ -1499,60 +1627,23 @@ nested_nmi_out: popq_cfi %rdx CFI_RESTORE rdx - /* No need to check faults here */ + /* We are returning to kernel mode, so this cannot result in a fault. */ INTERRUPT_RETURN CFI_RESTORE_STATE first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + /* Restore rdx. */ movq (%rsp), %rdx CFI_RESTORE rdx - /* Set the NMI executing variable on the stack. */ + /* Set "NMI executing" on the stack. */ pushq_cfi $1 - /* - * Leave room for the "copied" frame - */ + /* Leave room for the "iret" frame */ subq $(5*8), %rsp CFI_ADJUST_CFA_OFFSET 5*8 - /* Copy the stack frame to the Saved frame */ + /* Copy the "original" frame to the "outermost" frame */ .rept 5 pushq_cfi 11*8(%rsp) .endr @@ -1560,6 +1651,7 @@ first_nmi: /* Everything up to here is safe from nested NMIs */ +repeat_nmi: /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1568,16 +1660,21 @@ first_nmi: * it will just return, as we are about to repeat an NMI anyway. * This makes it safe to copy to the stack frame that a nested * NMI will update. - */ -repeat_nmi: - /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call do_nmi. + * + * Set "NMI executing" in case we came back here via IRET. */ movq $1, 10*8(%rsp) - /* Make another copy, this one may be modified by nested NMIs */ + /* + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. + */ addq $(10*8), %rsp CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 @@ -1588,9 +1685,9 @@ repeat_nmi: end_repeat_nmi: /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK @@ -1605,29 +1702,11 @@ end_repeat_nmi: call paranoid_entry DEFAULT_FRAME 0 - /* - * Save off the CR2 register. If we take a page fault in the NMI then - * it could corrupt the CR2 value. If the NMI preempts a page fault - * handler before it was able to read the CR2 register, and then the - * NMI itself takes a page fault, the page fault that was preempted - * will read the information from the NMI page fault and not the - * origin fault. Save it off and restore it if it changes. - * Use the r12 callee-saved register. - */ - movq %cr2, %r12 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi - /* Did the NMI take a page fault? Restore cr2 if it did */ - movq %cr2, %rcx - cmpq %rcx, %r12 - je 1f - movq %r12, %cr2 -1: - testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: @@ -1635,12 +1714,27 @@ nmi_swapgs: nmi_restore: RESTORE_EXTRA_REGS RESTORE_C_REGS - /* Pop the extra iret frame at once */ + + /* Point RSP at the "iret" frame. */ REMOVE_PT_GPREGS_FROM_STACK 6*8 - /* Clear the NMI executing stack variable */ - movq $0, 5*8(%rsp) - jmp irq_return + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. On a native kernel, we + * could just inspect RIP, but, on paravirt kernels, + * INTERRUPT_RETURN can translate into a jump into a + * hypercall page. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* + * INTERRUPT_RETURN reads the "iret" frame and exits the NMI + * stack in a single instruction. We are returning to kernel + * mode, so this cannot result in a fault. + */ + INTERRUPT_RETURN CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c37886d759cc..2bcc0525f1c1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> @@ -20,82 +21,82 @@ #include <asm/mmu_context.h> #include <asm/syscalls.h> -#ifdef CONFIG_SMP +/* context.lock is held for us, so we don't need any locking. */ static void flush_ldt(void *current_mm) { - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); + mm_context_t *pc; + + if (current->active_mm != current_mm) + return; + + pc = ¤t->active_mm->context; + set_ldt(pc->ldt->entries, pc->ldt->size); } -#endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ +static struct ldt_struct *alloc_ldt_struct(int size) { - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + struct ldt_struct *new_ldt; + int alloc_size; + + if (size > LDT_ENTRIES) + return NULL; + + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + if (!new_ldt) + return NULL; + + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); + alloc_size = size * LDT_ENTRY_SIZE; + + /* + * Xen is very picky: it requires a page-aligned LDT that has no + * trailing nonzero bytes in any page that contains LDT descriptors. + * Keep it simple: zero the whole allocation and never allocate less + * than PAGE_SIZE. + */ + if (alloc_size > PAGE_SIZE) + new_ldt->entries = vzalloc(alloc_size); else - newldt = (void *)__get_free_page(GFP_KERNEL); - - if (!newldt) - return -ENOMEM; + new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); + if (!new_ldt->entries) { + kfree(new_ldt); + return NULL; + } - paravirt_alloc_ldt(newldt, mincount); + new_ldt->size = size; + return new_ldt; +} -#ifdef CONFIG_X86_64 - /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; +/* After calling this, the LDT is immutable. */ +static void finalize_ldt_struct(struct ldt_struct *ldt) +{ + paravirt_alloc_ldt(ldt->entries, ldt->size); } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +/* context.lock is held */ +static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) { - int err = alloc_ldt(new, old->size, 0); - int i; + /* Synchronizes with lockless_dereference in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt); + + /* Activate the LDT for all CPUs using current_mm. */ + on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); +} - if (err < 0) - return err; +static void free_ldt_struct(struct ldt_struct *ldt) +{ + if (likely(!ldt)) + return; - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; + paravirt_free_ldt(ldt->entries, ldt->size); + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + kfree(ldt->entries); + kfree(ldt); } /* @@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + struct ldt_struct *new_ldt; struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); - mm->context.size = 0; old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); + if (!old_mm) { + mm->context.ldt = NULL; + return 0; } + + mutex_lock(&old_mm->context.lock); + if (!old_mm->context.ldt) { + mm->context.ldt = NULL; + goto out_unlock; + } + + new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); + if (!new_ldt) { + retval = -ENOMEM; + goto out_unlock; + } + + memcpy(new_ldt->entries, old_mm->context.ldt->entries, + new_ldt->size * LDT_ENTRY_SIZE); + finalize_ldt_struct(new_ldt); + + mm->context.ldt = new_ldt; + +out_unlock: + mutex_unlock(&old_mm->context.lock); return retval; } @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } + free_ldt_struct(mm->context.ldt); + mm->context.ldt = NULL; } static int read_ldt(void __user *ptr, unsigned long bytecount) { - int err; + int retval; unsigned long size; struct mm_struct *mm = current->mm; - if (!mm->context.size) - return 0; + mutex_lock(&mm->context.lock); + + if (!mm->context.ldt) { + retval = 0; + goto out_unlock; + } + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; + size = mm->context.ldt->size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + retval = -EFAULT; + goto out_unlock; + } + if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; + /* Zero-fill the rest and pretend we read bytecount bytes. */ + if (clear_user(ptr + size, bytecount - size)) { + retval = -EFAULT; + goto out_unlock; } } - return bytecount; -error_return: - return err; + retval = bytecount; + +out_unlock: + mutex_unlock(&mm->context.lock); + return retval; } static int read_default_ldt(void __user *ptr, unsigned long bytecount) @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) struct desc_struct ldt; int error; struct user_desc ldt_info; + int oldsize, newsize; + struct ldt_struct *new_ldt, *old_ldt; error = -EINVAL; if (bytecount != sizeof(ldt_info)) @@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out; } - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; + if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || + LDT_empty(&ldt_info)) { + /* The user wants to clear the entry. */ + memset(&ldt, 0, sizeof(ldt)); + } else { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out; } + + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; } - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { - error = -EINVAL; + mutex_lock(&mm->context.lock); + + old_ldt = mm->context.ldt; + oldsize = old_ldt ? old_ldt->size : 0; + newsize = max((int)(ldt_info.entry_number + 1), oldsize); + + error = -ENOMEM; + new_ldt = alloc_ldt_struct(newsize); + if (!new_ldt) goto out_unlock; - } - fill_ldt(&ldt, &ldt_info); - if (oldmode) - ldt.avl = 0; + if (old_ldt) + memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); + new_ldt->entries[ldt_info.entry_number] = ldt; + finalize_ldt_struct(new_ldt); - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); + install_ldt(mm, new_ldt); + free_ldt_struct(old_ldt); error = 0; out_unlock: diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index c3e985d1751c..d05bd2e2ee91 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs) NOKPROBE_SYMBOL(default_do_nmi); /* - * NMIs can hit breakpoints which will cause it to lose its - * NMI context with the CPU when the breakpoint does an iret. - */ -#ifdef CONFIG_X86_32 -/* - * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C (preventing nested - * NMIs if an NMI takes a trap). Simply have 3 states the NMI - * can be in: + * NMIs can page fault or hit breakpoints which will cause it to lose + * its NMI context with the CPU when the breakpoint or page fault does an IRET. + * + * As a result, NMIs can nest if NMIs get unmasked due an IRET during + * NMI processing. On x86_64, the asm glue protects us from nested NMIs + * if the outer NMI came from kernel mode, but we can still nest if the + * outer NMI came from user mode. + * + * To handle these nested NMIs, we have three states: * * 1) not running * 2) executing @@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi); * (Note, the latch is binary, thus multiple NMIs triggering, * when one is running, are ignored. Only one NMI is restarted.) * - * If an NMI hits a breakpoint that executes an iret, another - * NMI can preempt it. We do not want to allow this new NMI - * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the exit of the first NMI will - * perform a dec_return, if the result is zero (NOT_RUNNING), then - * it will simply exit the NMI handler. If not, the dec_return - * would have set the state to NMI_EXECUTING (what we want it to - * be when we are running). In this case, we simply jump back - * to rerun the NMI handler again, and restart the 'latched' NMI. + * If an NMI executes an iret, another NMI can preempt it. We do not + * want to allow this new NMI to run, but we want to execute it when the + * first one finishes. We set the state to "latched", and the exit of + * the first NMI will perform a dec_return, if the result is zero + * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the + * dec_return would have set the state to NMI_EXECUTING (what we want it + * to be when we are running). In this case, we simply jump back to + * rerun the NMI handler again, and restart the 'latched' NMI. * * No trap (breakpoint or page fault) should be hit before nmi_restart, * thus there is no race between the first check of state for NOT_RUNNING @@ -461,49 +460,36 @@ enum nmi_states { static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); -#define nmi_nesting_preprocess(regs) \ - do { \ - if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ - this_cpu_write(nmi_state, NMI_LATCHED); \ - return; \ - } \ - this_cpu_write(nmi_state, NMI_EXECUTING); \ - this_cpu_write(nmi_cr2, read_cr2()); \ - } while (0); \ - nmi_restart: - -#define nmi_nesting_postprocess() \ - do { \ - if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ - write_cr2(this_cpu_read(nmi_cr2)); \ - if (this_cpu_dec_return(nmi_state)) \ - goto nmi_restart; \ - } while (0) -#else /* x86_64 */ +#ifdef CONFIG_X86_64 /* - * In x86_64 things are a bit more difficult. This has the same problem - * where an NMI hitting a breakpoint that calls iret will remove the - * NMI context, allowing a nested NMI to enter. What makes this more - * difficult is that both NMIs and breakpoints have their own stack. - * When a new NMI or breakpoint is executed, the stack is set to a fixed - * point. If an NMI is nested, it will have its stack set at that same - * fixed address that the first NMI had, and will start corrupting the - * stack. This is handled in entry_64.S, but the same problem exists with - * the breakpoint stack. + * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without + * some care, the inner breakpoint will clobber the outer breakpoint's + * stack. * - * If a breakpoint is being processed, and the debug stack is being used, - * if an NMI comes in and also hits a breakpoint, the stack pointer - * will be set to the same fixed address as the breakpoint that was - * interrupted, causing that stack to be corrupted. To handle this case, - * check if the stack that was interrupted is the debug stack, and if - * so, change the IDT so that new breakpoints will use the current stack - * and not switch to the fixed address. On return of the NMI, switch back - * to the original IDT. + * If a breakpoint is being processed, and the debug stack is being + * used, if an NMI comes in and also hits a breakpoint, the stack + * pointer will be set to the same fixed address as the breakpoint that + * was interrupted, causing that stack to be corrupted. To handle this + * case, check if the stack that was interrupted is the debug stack, and + * if so, change the IDT so that new breakpoints will use the current + * stack and not switch to the fixed address. On return of the NMI, + * switch back to the original IDT. */ static DEFINE_PER_CPU(int, update_debug_stack); +#endif -static inline void nmi_nesting_preprocess(struct pt_regs *regs) +dotraplinkage notrace void +do_nmi(struct pt_regs *regs, long error_code) { + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { + this_cpu_write(nmi_state, NMI_LATCHED); + return; + } + this_cpu_write(nmi_state, NMI_EXECUTING); + this_cpu_write(nmi_cr2, read_cr2()); +nmi_restart: + +#ifdef CONFIG_X86_64 /* * If we interrupted a breakpoint, it is possible that * the nmi handler will have breakpoints too. We need to @@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) debug_stack_set_zero(); this_cpu_write(update_debug_stack, 1); } -} - -static inline void nmi_nesting_postprocess(void) -{ - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -} #endif -dotraplinkage notrace void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_nesting_preprocess(regs); - nmi_enter(); inc_irq_stat(__nmi_count); @@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code) nmi_exit(); - /* On i386, may loop back to preprocess */ - nmi_nesting_postprocess(); +#ifdef CONFIG_X86_64 + if (unlikely(this_cpu_read(update_debug_stack))) { + debug_stack_reset(); + this_cpu_write(update_debug_stack, 0); + } +#endif + + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c614dd492f5f..1f316f066c49 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,10 +41,18 @@ #include <asm/timer.h> #include <asm/special_insns.h> -/* nop stub */ -void _paravirt_nop(void) -{ -} +/* + * nop stub, which must not clobber anything *including the stack* to + * avoid confusing the entry prologues. + */ +extern void _paravirt_nop(void); +asm (".pushsection .entry.text, \"ax\"\n" + ".global _paravirt_nop\n" + "_paravirt_nop:\n\t" + "ret\n\t" + ".size _paravirt_nop, . - _paravirt_nop\n\t" + ".type _paravirt_nop, @function\n\t" + ".popsection"); /* identity function, which can be inlined */ u32 _paravirt_ident_32(u32 x) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6e338e3b1dc0..971743774248 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -453,6 +453,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static void mwait_idle(void) { if (!current_set_polling_and_test()) { + trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { smp_mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -464,6 +465,7 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f174..58e02d938218 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -122,11 +122,11 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { - if (dead_task->mm->context.size) { + if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, - dead_task->mm->context.size); + dead_task->mm->context.ldt->size); BUG(); } } @@ -499,27 +499,59 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ unsigned long get_wchan(struct task_struct *p) { - unsigned long stack; - u64 fp, ip; + unsigned long start, bottom, top, sp, fp, ip; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) return 0; - fp = *(u64 *)(p->thread.sp); + + fp = READ_ONCE(*(unsigned long *)sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < bottom || fp > top) return 0; - ip = *(u64 *)(fp+8); + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1ea14fd53933..e0fd5f47fbb9 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); +#else /* !CONFIG_X86_32 */ + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); +#endif /* CONFIG_X86_32 */ get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, #else /* !CONFIG_X86_32 */ put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->__pad2); - put_user_ex(0, &sc->__pad1); - put_user_ex(regs->ss, &sc->ss); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -450,19 +456,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* - * Set up the CS and SS registers to run signal handlers in - * 64-bit mode, even if the handler happens to be interrupting - * 32-bit or 16-bit code. - * - * SS is subtle. In 64-bit mode, we don't need any particular - * SS descriptor, but we do need SS to be valid. It's possible - * that the old SS is entirely bogus -- this can happen if the - * signal we're trying to deliver is #GP or #SS caused by a bad - * SS value. - */ + /* Set up the CS register to run signal handlers in 64-bit mode, + even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - regs->ss = __USER_DS; return 0; } diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9b4d51d0c0d0..0ccb53a9fcd9 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/ptrace.h> #include <asm/desc.h> +#include <asm/mmu_context.h> unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { @@ -27,13 +28,14 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re struct desc_struct *desc; unsigned long base; - seg &= ~7UL; + seg >>= 3; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(!child->mm->context.ldt || + seg >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { - desc = child->mm->context.ldt + seg; + desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc); /* 16-bit code segment? */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0c..21187ebee7d0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include <asm/hypervisor.h> #include <asm/nmi.h> #include <asm/x86_init.h> +#include <asm/geode.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1004,15 +1005,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9d28383fc1e7..c4ea87eedf8a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -150,7 +150,7 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { - return vcpu->arch.apic->pending_events; + return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b73337634214..554e877e0bc4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep) { return ACCESS_ONCE(*sptep); } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - /* It is valid if the spte is zapped. */ - return spte == 0ull; -} #else union split_spte { struct { @@ -478,23 +472,6 @@ retry: return spte.spte; } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - union split_spte sspte = (union split_spte)spte; - u32 high_mmio_mask = shadow_mmio_mask >> 32; - - /* It is valid if the spte is zapped. */ - if (spte == 0ull) - return true; - - /* It is valid if the spte is being zapped. */ - if (sspte.spte_low == 0ull && - (sspte.spte_high & high_mmio_mask) == high_mmio_mask) - return true; - - return false; -} #endif static bool spte_is_locklessly_modifiable(u64 spte) @@ -3343,21 +3320,6 @@ static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) return vcpu_match_mmio_gva(vcpu, addr); } - -/* - * On direct hosts, the last spte is only allows two states - * for mmio page fault: - * - It is the mmio spte - * - It is zapped or it is being zapped. - * - * This function completely checks the spte when the last spte - * is not the mmio spte. - */ -static bool check_direct_spte_mmio_pf(u64 spte) -{ - return __check_direct_spte_mmio_pf(spte); -} - static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) { struct kvm_shadow_walk_iterator iterator; @@ -3400,13 +3362,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) } /* - * It's ok if the gva is remapped by other cpus on shadow guest, - * it's a BUG if the gfn is not a mmio page. - */ - if (direct && !check_direct_spte_mmio_pf(spte)) - return RET_MMIO_PF_BUG; - - /* * If the page table is zapped by other cpus, let CPU fault again on * the address. */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4911bf19122b..7858cd9acfe4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -512,7 +512,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d73807f0d31..bc3041e1abbc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6144,6 +6144,8 @@ static __init int hardware_setup(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea306adbbc13..47a32f743a91 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2192,7 +2192,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (guest_cpuid_has_tsc_adjust(vcpu)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; - kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); + adjust_tsc_offset_guest(vcpu, adj); } vcpu->arch.ia32_tsc_adjust_msr = data; } diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9b868124128d..274a52b1183e 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -29,7 +29,6 @@ #include <asm/uaccess.h> #include <asm/traps.h> -#include <asm/desc.h> #include <asm/user.h> #include <asm/i387.h> @@ -185,7 +184,7 @@ void math_emulate(struct math_emu_info *info) math_abort(FPU_info, SIGILL); } - code_descriptor = LDT_DESCRIPTOR(FPU_CS); + code_descriptor = FPU_get_ldt_descriptor(FPU_CS); if (SEG_D_SIZE(code_descriptor)) { /* The above test may be wrong, the book is not clear */ /* Segmented 32 bit protected mode */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 2c614410a5f3..d342fce49447 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,9 +16,24 @@ #include <linux/kernel.h> #include <linux/mm.h> -/* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#include <asm/desc.h> +#include <asm/mmu_context.h> + +static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) +{ + static struct desc_struct zero_desc; + struct desc_struct ret = zero_desc; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + seg >>= 3; + mutex_lock(¤t->mm->context.lock); + if (current->mm->context.ldt && seg < current->mm->context.ldt->size) + ret = current->mm->context.ldt->entries[seg]; + mutex_unlock(¤t->mm->context.lock); +#endif + return ret; +} + #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 6ef5e99380f9..8300db71c2a6 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -20,7 +20,6 @@ #include <linux/stddef.h> #include <asm/uaccess.h> -#include <asm/desc.h> #include "fpu_system.h" #include "exception.h" @@ -158,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, addr->selector = PM_REG_(segment); } - descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + descriptor = FPU_get_ldt_descriptor(addr->selector); base_address = SEG_BASE_ADDR(descriptor); address = base_address + offset; limit = base_address diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c8140e12816a..c23ab1ee3a9a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,6 +137,7 @@ page_table_range_init_count(unsigned long start, unsigned long end) vaddr = start; pgd_idx = pgd_index(vaddr); + pmd_idx = pmd_index(vaddr); for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3fba623e3ba5..f9977a7a9444 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,7 +1132,7 @@ void mark_rodata_ro(void) * has been zapped already via cleanup_highmem(). */ all_end = roundup((unsigned long)_brk_end, PMD_SIZE); - set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 841ea05e1b02..477384985ac9 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -679,6 +679,70 @@ out: } /* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return memmap.map_end - memmap.desc_size; + + entry -= memmap.desc_size; + if (entry < memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return memmap.map; + + entry += memmap.desc_size; + if (entry >= memmap.map_end) + return NULL; + + return entry; +} + +/* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. */ @@ -688,7 +752,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift) unsigned long left = 0; efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + p = NULL; + while ((p = efi_map_next_entry(p))) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) { #ifdef CONFIG_X86_64 diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 757678fb26e1..bf9384488399 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -23,6 +23,7 @@ #include <asm/debugreg.h> #include <asm/fpu-internal.h> /* pcntxt_mask */ #include <asm/cpu.h> +#include <asm/mmu_context.h> #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -154,7 +155,7 @@ static void fix_processor_context(void) syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ + load_mm_ldt(current->active_mm); /* This does lldt */ } /** diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index e88fda867a33..484145368a24 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,7 +8,7 @@ config XEN select PARAVIRT_CLOCK select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE) - depends on X86_TSC + depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the @@ -17,7 +17,7 @@ config XEN config XEN_DOM0 def_bool y depends on XEN && PCI_XEN && SWIOTLB_XEN - depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI + depends on X86_IO_APIC && ACPI && PCI config XEN_PVHVM def_bool y diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 7322755f337a..4b6e29ac0968 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,13 +13,13 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o + p2m.o apic.o obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += apic.o vga.o +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..0cc657160cb6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,10 @@ #include <linux/memblock.h> #include <linux/edd.h> +#ifdef CONFIG_KEXEC_CORE +#include <linux/kexec.h> +#endif + #include <xen/xen.h> #include <xen/events.h> #include <xen/interface/xen.h> @@ -483,6 +487,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t pte; unsigned long pfn; struct page *page; + unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); @@ -492,6 +497,32 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte = pfn_pte(pfn, prot); + /* + * Careful: update_va_mapping() will fail if the virtual address + * we're poking isn't populated in the page tables. We don't + * need to worry about the direct map (that's always in the page + * tables), but we need to be careful about vmap space. In + * particular, the top level page table can lazily propagate + * entries between processes, so if we've switched mms since we + * vmapped the target in the first place, we might not have the + * top-level page table entry populated. + * + * We disable preemption because we want the same mm active when + * we probe the target and when we issue the hypercall. We'll + * have the same nominal mm, but if we're a kernel thread, lazy + * mm dropping could change our pgd. + * + * Out of an abundance of caution, this uses __get_user() to fault + * in the target address just in case there's some obscure case + * in which the target address isn't readable. + */ + + preempt_disable(); + + pagefault_disable(); /* Avoid warnings due to being atomic. */ + __get_user(dummy, (unsigned char __user __force *)v); + pagefault_enable(); + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); @@ -503,6 +534,8 @@ static void set_aliased_prot(void *v, pgprot_t prot) BUG(); } else kmap_flush_unused(); + + preempt_enable(); } static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) @@ -510,6 +543,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; + /* + * We need to mark the all aliases of the LDT pages RO. We + * don't need to call vm_flush_aliases(), though, since that's + * only responsible for flushing aliases out the TLBs, not the + * page tables, and Xen will flush the TLB for us if needed. + * + * To avoid confusing future readers: none of this is necessary + * to load the LDT. The hypervisor only checks this when the + * LDT is faulted in due to subsequent descriptor access. + */ + for(i = 0; i < entries; i += entries_per_page) set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } @@ -1758,6 +1802,21 @@ static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; +#ifdef CONFIG_KEXEC_CORE +static void xen_hvm_shutdown(void) +{ + native_machine_shutdown(); + if (kexec_in_progress) + xen_reboot(SHUTDOWN_soft_reset); +} + +static void xen_hvm_crash_shutdown(struct pt_regs *regs) +{ + native_machine_crash_shutdown(regs); + xen_reboot(SHUTDOWN_soft_reset); +} +#endif + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -1777,6 +1836,10 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); +#ifdef CONFIG_KEXEC_CORE + machine_ops.shutdown = xen_hvm_shutdown; + machine_ops.crash_shutdown = xen_hvm_crash_shutdown; +#endif } #endif diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549..bef30cbb56c4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -101,17 +101,15 @@ struct dom0_vga_console_info; #ifdef CONFIG_XEN_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -void __init xen_init_apic(void); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) { } -static inline void __init xen_init_apic(void) -{ -} #endif +void __init xen_init_apic(void); + #ifdef CONFIG_XEN_EFI extern void xen_efi_init(void); #else diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 677bfcf4ee5d..28f33a8b7f5f 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -25,30 +25,39 @@ static inline void spill_registers(void) { #if XCHAL_NUM_AREGS > 16 __asm__ __volatile__ ( - " call12 1f\n" + " call8 1f\n" " _j 2f\n" " retw\n" " .align 4\n" "1:\n" +#if XCHAL_NUM_AREGS == 32 + " _entry a1, 32\n" + " addi a8, a0, 3\n" + " _entry a1, 16\n" + " mov a12, a12\n" + " retw\n" +#else " _entry a1, 48\n" - " addi a12, a0, 3\n" -#if XCHAL_NUM_AREGS > 32 - " .rept (" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n" + " call12 1f\n" + " retw\n" + " .align 4\n" + "1:\n" + " .rept (" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n" " _entry a1, 48\n" " mov a12, a0\n" " .endr\n" -#endif - " _entry a1, 48\n" + " _entry a1, 16\n" #if XCHAL_NUM_AREGS % 12 == 0 - " mov a8, a8\n" -#elif XCHAL_NUM_AREGS % 12 == 4 " mov a12, a12\n" -#elif XCHAL_NUM_AREGS % 12 == 8 +#elif XCHAL_NUM_AREGS % 12 == 4 " mov a4, a4\n" +#elif XCHAL_NUM_AREGS % 12 == 8 + " mov a8, a8\n" #endif " retw\n" +#endif "2:\n" - : : : "a12", "a13", "memory"); + : : : "a8", "a9", "memory"); #else __asm__ __volatile__ ( " mov a12, a12\n" diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 82bbfa5a05b3..a2a902140c4e 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -568,12 +568,13 @@ user_exception_exit: * (if we have restored WSBITS-1 frames). */ +2: #if XCHAL_HAVE_THREADPTR l32i a3, a1, PT_THREADPTR wur a3, threadptr #endif -2: j common_exception_exit + j common_exception_exit /* This is the kernel exception exit. * We avoided to do a MOVSP when we entered the exception, but we @@ -1820,7 +1821,7 @@ ENDPROC(system_call) mov a12, a0 .endr #endif - _entry a1, 48 + _entry a1, 16 #if XCHAL_NUM_AREGS % 12 == 0 mov a8, a8 #elif XCHAL_NUM_AREGS % 12 == 4 @@ -1844,7 +1845,7 @@ ENDPROC(system_call) ENTRY(_switch_to) - entry a1, 16 + entry a1, 48 mov a11, a3 # and 'next' (a3) |