From f0573e6db1d1e637e20011f40264b2f5b5880587 Mon Sep 17 00:00:00 2001 From: Alberto Panizzo Date: Mon, 1 Nov 2010 18:00:03 +0100 Subject: mach-pcm037_eet: Fix section mismatch for eet_init_devices() This function should be marked as __init because it is used only in the init phase. This fix the compiler warning: LD arch/arm/mach-mx3/built-in.o WARNING: arch/arm/mach-mx3/built-in.o(.text+0x1328): Section mismatch in reference from the function eet_init_devices() to the (unknown reference) .init.rodata:(unknown) The function eet_init_devices() references the (unknown reference) __initconst (unknown). This is often because eet_init_devices lacks a __initconst annotation or the annotation of (unknown) is wrong. Signed-off-by: Alberto Panizzo Signed-off-by: Sascha Hauer --- arch/arm/mach-mx3/mach-pcm037_eet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-mx3/mach-pcm037_eet.c b/arch/arm/mach-mx3/mach-pcm037_eet.c index 99e0894e07db..3392812a55f7 100644 --- a/arch/arm/mach-mx3/mach-pcm037_eet.c +++ b/arch/arm/mach-mx3/mach-pcm037_eet.c @@ -171,7 +171,7 @@ static struct platform_device pcm037_gpio_keys_device = { }, }; -static int eet_init_devices(void) +static int __init eet_init_devices(void) { if (!machine_is_pcm037() || pcm037_variant() != PCM037_EET) return 0; -- cgit v1.2.3 From 25591b07033663e09f5e60355fc5ec4d4aa53e63 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Nov 2010 10:05:51 +0100 Subject: [S390] fix get_user_pages_fast The check for the _PAGE_RO bit in get_user_pages_fast for write==1 is the wrong way around. It must not be set for the fast path. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/gup.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 38e641cdd977..45b405ca2567 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -20,18 +20,17 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - unsigned long mask, result; + unsigned long mask; pte_t *ptep, pte; struct page *page; - result = write ? 0 : _PAGE_RO; - mask = result | _PAGE_INVALID | _PAGE_SPECIAL; + mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); do { pte = *ptep; barrier(); - if ((pte_val(pte) & mask) != result) + if ((pte_val(pte) & mask) != 0) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); -- cgit v1.2.3 From ec6743bb06510c7b629603ce35713d6ae9273579 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 10 Nov 2010 10:05:55 +0100 Subject: [S390] mm: add devmem_is_allowed() for STRICT_DEVMEM checking Provide the devmem_is_allowed() routine to restrict access to kernel memory from userspace. Set the CONFIG_STRICT_DEVMEM config option to switch on checking. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig.debug | 12 ++++++++++++ arch/s390/include/asm/page.h | 5 +++++ 2 files changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 45e0c6199f36..05221b13ffb1 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -6,6 +6,18 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" +config STRICT_DEVMEM + def_bool y + prompt "Filter access to /dev/mem" + ---help--- + This option restricts access to /dev/mem. If this option is + disabled, you allow userspace access to all memory, including + kernel and userspace memory. Accidental memory access is likely + to be disastrous. + Memory access is required for experts who want to debug the kernel. + + If you are unsure, say Y. + config DEBUG_STRICT_USER_COPY_CHECKS bool "Strict user copy size checks" ---help--- diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a8729ea7e9ac..3c987e9ec8d6 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -130,6 +130,11 @@ struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +static inline int devmem_is_allowed(unsigned long pfn) +{ + return 0; +} + #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE -- cgit v1.2.3 From adb45839817392102e659c19e5c19aa39530021f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Nov 2010 10:05:57 +0100 Subject: [S390] kprobes: disable interrupts throughout Execute the kprobe exception and fault handler with interrupts disabled. To disable the interrupts only while a single step is in progress is not good enough, a kprobe from interrupt context while another kprobe is handled can confuse the internal house keeping. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kprobes.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d60fc4398516..70cf73bdba25 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -30,6 +30,7 @@ #include #include #include +#include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -212,7 +213,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) /* Set the PER control regs, turns on single step for this address */ __ctl_load(kprobe_per_regs, 9, 11); regs->psw.mask |= PSW_MASK_PER; - regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); } static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) @@ -239,7 +240,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, __get_cpu_var(current_kprobe) = p; /* Save the interrupt and per flags */ kcb->kprobe_saved_imask = regs->psw.mask & - (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); + (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); /* Save the control regs that govern PER */ __ctl_store(kcb->kprobe_saved_ctl, 9, 11); } @@ -316,8 +317,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 1; ss_probe: - if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) - local_irq_disable(); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; return 1; @@ -465,8 +464,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) goto out; } reset_current_kprobe(); - if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) - local_irq_enable(); out: preempt_enable_no_resched(); @@ -482,7 +479,7 @@ out: return 1; } -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -508,8 +505,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else { reset_current_kprobe(); - if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) - local_irq_enable(); } preempt_enable_no_resched(); break; @@ -553,6 +548,18 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + int ret; + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + ret = kprobe_trap_handler(regs, trapnr); + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; +} + /* * Wrapper routine to for handling exceptions. */ @@ -560,8 +567,12 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; + struct pt_regs *regs = args->regs; int ret = NOTIFY_DONE; + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + switch (val) { case DIE_BPT: if (kprobe_handler(args->regs)) @@ -572,16 +583,17 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_TRAP: - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) + if (!preemptible() && kprobe_running() && + kprobe_trap_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; - preempt_enable(); break; default: break; } + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; } @@ -595,6 +607,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); /* r14 is the function return address */ kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14]; -- cgit v1.2.3 From 89480801a17a3069f45169d40b828c8e511aa005 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Nov 2010 10:05:58 +0100 Subject: [S390] kprobes: Fix the return address of multiple kretprobes Analog to git commit 737480a0d525dae13306296da08029dff545bc72 fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kprobes.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 70cf73bdba25..2564793ec2b6 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -349,6 +349,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, struct hlist_node *node, *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -371,10 +372,32 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, /* another task is sharing our hash bucket */ continue; - if (ri->rp && ri->rp->handler) - ri->rp->handler(ri, regs); + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; orig_ret_address = (unsigned long)ri->ret_addr; + + if (ri->rp && ri->rp->handler) { + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + } + recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) { @@ -386,7 +409,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, break; } } - kretprobe_assert(ri, orig_ret_address, trampoline_address); + regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; reset_current_kprobe(); -- cgit v1.2.3 From b5908548537ccd3ada258ca5348df7ffc93e5a06 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 22:29:49 -0500 Subject: tracing: Force arch_local_irq_* notrace for paravirt When running ktest.pl randconfig tests, I would sometimes trigger a lockdep annotation bug (possible reason: unannotated irqs-on). This triggering happened right after function tracer self test was executed. After doing a config bisect I found that this was caused with having function tracer, paravirt guest, prove locking, and rcu torture all enabled. The rcu torture just enhanced the likelyhood of triggering the bug. Prove locking was needed, since it was the thing that was bugging. Function tracer would trace and disable interrupts in all sorts of funny places. paravirt guest would turn arch_local_irq_* into functions that would be traced. Besides the fact that tracing arch_local_irq_* is just a bad idea, this is what is happening. The bug happened simply in the local_irq_restore() code: if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ } else { \ trace_hardirqs_on(); \ raw_local_irq_restore(flags); \ } \ The raw_local_irq_restore() was defined as arch_local_irq_restore(). Now imagine, we are about to enable interrupts. We go into the else case and call trace_hardirqs_on() which tells lockdep that we are enabling interrupts, so it sets the current->hardirqs_enabled = 1. Then we call raw_local_irq_restore() which calls arch_local_irq_restore() which gets traced! Now in the function tracer we disable interrupts with local_irq_save(). This is fine, but flags is stored that we have interrupts disabled. When the function tracer calls local_irq_restore() it does it, but this time with flags set as disabled, so we go into the if () path. This keeps interrupts disabled and calls trace_hardirqs_off() which sets current->hardirqs_enabled = 0. When the tracer is finished and proceeds with the original code, we enable interrupts but leave current->hardirqs_enabled as 0. Which now breaks lockdeps internal processing. Cc: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/include/asm/paravirt.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 18e3b8a8709f..ef9975812c77 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long arch_local_save_flags(void) { return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); } -static inline void arch_local_irq_restore(unsigned long f) +static inline notrace void arch_local_irq_restore(unsigned long f) { PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_disable); } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_enable); } -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void) { unsigned long f; -- cgit v1.2.3 From e060e7af98182494b764d002eba7fa022fe91bdf Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 11 Nov 2010 12:37:43 -0800 Subject: xen: set vma flag VM_PFNMAP in the privcmd mmap file_op Set VM_PFNMAP in the privcmd mmap file_op, rather than later in xen_remap_domain_mfn_range when it is too late because vma_wants_writenotify has already been called and vm_page_prot has already been modified. Signed-off-by: Stefano Stabellini Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f08ea045620f..792de4349c79 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2299,7 +2299,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == + (VM_PFNMAP | VM_RESERVED | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; -- cgit v1.2.3 From 6c0aca288e726405b01dacb12cac556454d34b2a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Nov 2010 21:18:43 +0100 Subject: x86: Ignore trap bits on single step exceptions When a single step exception fires, the trap bits, used to signal hardware breakpoints, are in a random state. These trap bits might be set if another exception will follow, like a breakpoint in the next instruction, or a watchpoint in the previous one. Or there can be any junk there. So if we handle these trap bits during the single step exception, we are going to handle an exception twice, or we are going to handle junk. Just ignore them in this case. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=21332 Reported-by: Michael Stefaniuc Signed-off-by: Frederic Weisbecker Cc: Rafael J. Wysocki Cc: Maciej Rutecki Cc: Alexandre Julliard Cc: Jason Wessel Cc: All since 2.6.33.x --- arch/x86/kernel/hw_breakpoint.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff15c9dcc25d..42c594254507 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; + /* If it's a single step, TRAP bits are random */ + if (dr6 & DR_STEP) + return NOTIFY_DONE; + /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; -- cgit v1.2.3 From 7e77506a5918d82cafa2ffa783ab57c23f9e9817 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 30 Sep 2010 12:37:26 +0100 Subject: xen: implement XENMEM_machphys_mapping This hypercall allows Xen to specify a non-default location for the machine to physical mapping. This capability is used when running a 32 bit domain 0 on a 64 bit hypervisor to shrink the hypervisor hole to exactly the size required. [ Impact: add Xen hypercall definitions ] Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini --- arch/x86/include/asm/xen/interface.h | 6 +++--- arch/x86/include/asm/xen/interface_32.h | 5 +++++ arch/x86/include/asm/xen/interface_64.h | 13 +------------ arch/x86/include/asm/xen/page.h | 7 ++++--- arch/x86/xen/enlighten.c | 7 +++++++ arch/x86/xen/mmu.c | 14 ++++++++++++++ 6 files changed, 34 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1f0c55..1c10c88ee4e1 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e004ae5c..8413688b2571 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -32,6 +32,11 @@ /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d2662b97c..839a4811cf98 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -39,18 +39,7 @@ #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define __MACH2PHYS_SHIFT 3 /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c1414b3d5..8760cc60a21c 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ typedef struct xpaddr { #define MAX_DOMAIN_PAGES \ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; -#if 0 if (unlikely((mfn >> machine_to_phys_order) != 0)) - return max_mapnr; -#endif + return ~0; pfn = 0; /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4d3861..bd3554934613 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); +unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -1097,6 +1102,8 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; + xen_setup_machphys_mapping(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7f75a5..bd2713a82571 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } +void __init xen_setup_machphys_mapping(void) +{ + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; + + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } else { + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + } + machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +} + #ifdef CONFIG_X86_64 static void convert_pfn_mfn(void *v) { -- cgit v1.2.3 From 415d34195b3c0c26544034d37b8766dfffd36bcf Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 15:43:11 +0100 Subject: ARM i.MX: sdma is merged, so remove #ifdef SDMA_IS_MERGED Signed-off-by: Sascha Hauer --- arch/arm/plat-mxc/devices/platform-imx-dma.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/arm/plat-mxc/devices/platform-imx-dma.c b/arch/arm/plat-mxc/devices/platform-imx-dma.c index 02d989018059..3a705c7877dd 100644 --- a/arch/arm/plat-mxc/devices/platform-imx-dma.c +++ b/arch/arm/plat-mxc/devices/platform-imx-dma.c @@ -12,15 +12,7 @@ #include #include -#ifdef SDMA_IS_MERGED #include -#else -struct sdma_platform_data { - int sdma_version; - char *cpu_name; - int to_version; -}; -#endif struct imx_imx_sdma_data { resource_size_t iobase; -- cgit v1.2.3 From 8cad8fa1d716b16aa22d5c670ead2b952e1e59b9 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 9 Aug 2010 09:45:09 +0200 Subject: ARM i.MX pcm037 eet: compile fixes The pcm037 eet extension currently does not compile if SPI_IMX is enabled. Fix it. Signed-off-by: Sascha Hauer --- arch/arm/mach-mx3/mach-pcm037_eet.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mx3/mach-pcm037_eet.c b/arch/arm/mach-mx3/mach-pcm037_eet.c index 3392812a55f7..fda56545d2fd 100644 --- a/arch/arm/mach-mx3/mach-pcm037_eet.c +++ b/arch/arm/mach-mx3/mach-pcm037_eet.c @@ -14,6 +14,7 @@ #include #include +#include #include @@ -59,14 +60,12 @@ static struct spi_board_info pcm037_spi_dev[] = { }; /* Platform Data for MXC CSPI */ -#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) static int pcm037_spi1_cs[] = {MXC_SPI_CS(1), IOMUX_TO_GPIO(MX31_PIN_KEY_COL7)}; static const struct spi_imx_master pcm037_spi1_pdata __initconst = { .chipselect = pcm037_spi1_cs, .num_chipselect = ARRAY_SIZE(pcm037_spi1_cs), }; -#endif /* GPIO-keys input device */ static struct gpio_keys_button pcm037_gpio_keys[] = { -- cgit v1.2.3 From 3530b417f457627432cff1dfd8db659042d66695 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 23:08:59 +0100 Subject: ARM i.MX spi: fix compilation for i.MX21 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sascha Hauer Acked-by: Uwe Kleine-König --- arch/arm/plat-mxc/devices/platform-spi_imx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/plat-mxc/devices/platform-spi_imx.c b/arch/arm/plat-mxc/devices/platform-spi_imx.c index e48340ec331e..17f724c9452d 100644 --- a/arch/arm/plat-mxc/devices/platform-spi_imx.c +++ b/arch/arm/plat-mxc/devices/platform-spi_imx.c @@ -27,6 +27,7 @@ const struct imx_spi_imx_data imx21_cspi_data[] __initconst = { imx_spi_imx_data_entry(MX21, CSPI, "imx21-cspi", _id, _hwid, SZ_4K) imx21_cspi_data_entry(0, 1), imx21_cspi_data_entry(1, 2), +}; #endif #ifdef CONFIG_ARCH_MX25 -- cgit v1.2.3 From 6f5ae900957b73f5d18c70ad69662ca604ff77e1 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 21:27:05 +0100 Subject: ARM i.MX27 eukrea: Fix compilation Currently compilation breaks for the eukrea mbimx27 baseboard when CONFIG_SPI_IMX is selected and CONFIG_TOUCHSCREEN_ADS7846 is not selected. Fix this by removing the ifdefs altogether. Signed-off-by: Sascha Hauer Cc: Eric Benard --- arch/arm/mach-imx/eukrea_mbimx27-baseboard.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c index 026263c665ca..7e1e9dc2c8fc 100644 --- a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c +++ b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c @@ -250,9 +250,6 @@ static const struct imxuart_platform_data uart_pdata __initconst = { .flags = IMXUART_HAVE_RTSCTS, }; -#if defined(CONFIG_TOUCHSCREEN_ADS7846) \ - || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) - #define ADS7846_PENDOWN (GPIO_PORTD | 25) static void ads7846_dev_init(void) @@ -273,9 +270,7 @@ static struct ads7846_platform_data ads7846_config __initdata = { .get_pendown_state = ads7846_get_pendown_state, .keep_vref_on = 1, }; -#endif -#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) static struct spi_board_info eukrea_mbimx27_spi_board_info[] __initdata = { [0] = { .modalias = "ads7846", @@ -294,7 +289,6 @@ static const struct spi_imx_master eukrea_mbimx27_spi0_data __initconst = { .chipselect = eukrea_mbimx27_spi_cs, .num_chipselect = ARRAY_SIZE(eukrea_mbimx27_spi_cs), }; -#endif static struct i2c_board_info eukrea_mbimx27_i2c_devices[] = { { -- cgit v1.2.3 From 46e3f3075931493f65e9561ef57bcc23fe077a13 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 9 Nov 2010 08:47:54 +0200 Subject: mx25: fix spi device registration typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 35bab0589b9a71533b37280eefa430c21dc102fe (ARM: imx: change the way spi-imx devices are registered) contained a typo in mx25, leading to link time failure. Signed-off-by: Baruch Siach Signed-off-by: Sascha Hauer Acked-by: Uwe Kleine-König --- arch/arm/mach-mx25/devices-imx25.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mx25/devices-imx25.h b/arch/arm/mach-mx25/devices-imx25.h index 93afa10b13cf..d94d282fa676 100644 --- a/arch/arm/mach-mx25/devices-imx25.h +++ b/arch/arm/mach-mx25/devices-imx25.h @@ -42,9 +42,9 @@ extern const struct imx_mxc_nand_data imx25_mxc_nand_data __initconst; #define imx25_add_mxc_nand(pdata) \ imx_add_mxc_nand(&imx25_mxc_nand_data, pdata) -extern const struct imx_spi_imx_data imx25_spi_imx_data[] __initconst; +extern const struct imx_spi_imx_data imx25_cspi_data[] __initconst; #define imx25_add_spi_imx(id, pdata) \ - imx_add_spi_imx(&imx25_spi_imx_data[id], pdata) + imx_add_spi_imx(&imx25_cspi_data[id], pdata) #define imx25_add_spi_imx0(pdata) imx25_add_spi_imx(0, pdata) #define imx25_add_spi_imx1(pdata) imx25_add_spi_imx(1, pdata) #define imx25_add_spi_imx2(pdata) imx25_add_spi_imx(2, pdata) -- cgit v1.2.3 From 35a96c739fd7624b8edff990a74b86b5a85342da Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 15 Nov 2010 18:18:32 +0900 Subject: sh: clkfwk: Kill off now unused algo_id in set_rate op. Now that clk_set_rate_ex() is gone, there is also no way to get at rate setting algo id, which is now also completely unused. Kill it off before new clock ops start using it. Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 6 ++---- arch/sh/kernel/cpu/sh4/clock-sh4-202.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7db31e6c6bf2..b25ce90a346e 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -220,8 +220,7 @@ static void pllc2_disable(struct clk *clk) __raw_writel(__raw_readl(PLLC2CR) & ~0x80000000, PLLC2CR); } -static int pllc2_set_rate(struct clk *clk, - unsigned long rate, int algo_id) +static int pllc2_set_rate(struct clk *clk, unsigned long rate) { unsigned long value; int idx; @@ -463,8 +462,7 @@ static int fsidiv_enable(struct clk *clk) return 0; } -static int fsidiv_set_rate(struct clk *clk, - unsigned long rate, int algo_id) +static int fsidiv_set_rate(struct clk *clk, unsigned long rate) { int idx; diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c index 4eabc68cd753..b601fa3978d1 100644 --- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c @@ -110,7 +110,7 @@ static int shoc_clk_verify_rate(struct clk *clk, unsigned long rate) return 0; } -static int shoc_clk_set_rate(struct clk *clk, unsigned long rate, int algo_id) +static int shoc_clk_set_rate(struct clk *clk, unsigned long rate) { unsigned long frqcr3; unsigned int tmp; -- cgit v1.2.3 From 24f3f6b5eff92608a62449e33bfac0eed1447d02 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 15 Nov 2010 09:18:49 -0500 Subject: arch/tile: fix rwlock so would-be write lockers don't block new readers This avoids a deadlock in the IGMP code where one core gets a read lock, another core starts trying to get a write lock (thus blocking new readers), and then the first core tries to recursively re-acquire the read lock. We still try to preserve some degree of balance by giving priority to additional write lockers that come along while the lock is held for write, so they can all complete quickly and return the lock to the readers. Signed-off-by: Chris Metcalf --- arch/tile/lib/spinlock_32.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 485e24d62c6b..5cd1c4004eca 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -167,23 +167,30 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) * when we compare them. */ u32 my_ticket_; + u32 iterations = 0; - /* Take out the next ticket; this will also stop would-be readers. */ - if (val & 1) - val = get_rwlock(rwlock); - rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); + /* + * Wait until there are no readers, then bump up the next + * field and capture the ticket value. + */ + for (;;) { + if (!(val & 1)) { + if ((val >> RD_COUNT_SHIFT) == 0) + break; + rwlock->lock = val; + } + delay_backoff(iterations++); + val = __insn_tns((int *)&rwlock->lock); + } - /* Extract my ticket value from the original word. */ + /* Take out the next ticket and extract my ticket value. */ + rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); my_ticket_ = val >> WR_NEXT_SHIFT; - /* - * Wait until the "current" field matches our ticket, and - * there are no remaining readers. - */ + /* Wait until the "current" field matches our ticket. */ for (;;) { u32 curr_ = val >> WR_CURR_SHIFT; - u32 readers = val >> RD_COUNT_SHIFT; - u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers; + u32 delta = ((my_ticket_ - curr_) & WR_MASK); if (likely(delta == 0)) break; -- cgit v1.2.3 From f281233d3eba15fb225d21ae2e228fd4553d824a Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 16 Nov 2010 02:10:29 -0500 Subject: SCSI host lock push-down Move the mid-layer's ->queuecommand() invocation from being locked with the host lock to being unlocked to facilitate speeding up the critical path for drivers who don't need this lock taken anyway. The patch below presents a simple SCSI host lock push-down as an equivalent transformation. No locking or other behavior should change with this patch. All existing bugs and locking orders are preserved. Additionally, add one parameter to queuecommand, struct Scsi_Host * and remove one parameter from queuecommand, void (*done)(struct scsi_cmnd *) Scsi_Host* is a convenient pointer that most host drivers need anyway, and 'done' is redundant to struct scsi_cmnd->scsi_done. Minimal code disturbance was attempted with this change. Most drivers needed only two one-line modifications for their host lock push-down. Signed-off-by: Jeff Garzik Acked-by: James Bottomley Signed-off-by: Linus Torvalds --- arch/ia64/hp/sim/simscsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index 3a078ad3aa44..331de723c676 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c @@ -202,7 +202,7 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode) } static int -simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) +simscsi_queuecommand_lck (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) { unsigned int target_id = sc->device->id; char fname[MAX_ROOT_LEN+16]; @@ -326,6 +326,8 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) return 0; } +static DEF_SCSI_QCMD(simscsi_queuecommand) + static int simscsi_host_reset (struct scsi_cmnd *sc) { -- cgit v1.2.3 From a6786fdad97d2ef4454f75a11a4f2a3bf67f2348 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 17 Nov 2010 06:51:08 +0000 Subject: sh: avoid to flush all cache in sys_cacheflush Calling sys_cacheflush with ICACHE we can direclty flush the icache without invoking the flush_cache_all function. Signed-off-by: Giuseppe Cavallaro Signed-off-by: Carmelo Amoroso Signed-off-by: Paul Mundt --- arch/sh/kernel/sys_sh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index 81f58371613d..8c6a350df751 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c @@ -88,7 +88,7 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, int op) } if (op & CACHEFLUSH_I) - flush_cache_all(); + flush_icache_range(addr, addr+len); up_read(¤t->mm->mmap_sem); return 0; -- cgit v1.2.3 From 94ab115fd3e7f7e7f92f1645bbb6ba5414701b25 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 17 Nov 2010 06:51:52 +0000 Subject: sh: fix vsyscall compilation due to .eh_frame issue This patch fixes the following error obtained when compile the Kernel with the VSYSCALL support enabled: SYSCALL arch/sh/kernel/vsyscall/vsyscall-trapa.so sh4-linux/bin/ld: error in arch/sh/kernel/vsyscall/vsyscall-trapa.o(.eh_frame); no .eh_frame_hdr table will be created. AS arch/sh/kernel/vsyscall/vsyscall-syscall.o Disassembling the vsyscall-trapa.o code, with this fix applied, we get the __kernel_vsyscall symbol defined; otherwise it was missing. Disassembly of section .text: 00000000 <__kernel_vsyscall>: 0: 10 00 .word 0x0010 2: 00 00 .word 0x0000 [snip] 0000040 <__kernel_sigreturn>: 40: 05 93 mov.w 4e <__kernel_sigreturn+0xe>,r3 ! 77 42: 10 c3 trapa #16 [snip] 00000060 <__kernel_rt_sigreturn>: 60: 05 93 mov.w 6e <__kernel_rt_sigreturn+0xe>,r3 ! ad 62: 10 c3 trapa #16 [snip] Signed-off-by: Giuseppe Cavallaro Reviewed-by: Carmelo Amoroso Signed-off-by: Paul Mundt --- arch/sh/kernel/vsyscall/vsyscall-trapa.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/kernel/vsyscall/vsyscall-trapa.S b/arch/sh/kernel/vsyscall/vsyscall-trapa.S index 3b6eb34c43fa..3e70f851cdc6 100644 --- a/arch/sh/kernel/vsyscall/vsyscall-trapa.S +++ b/arch/sh/kernel/vsyscall/vsyscall-trapa.S @@ -8,9 +8,9 @@ __kernel_vsyscall: * fill out .eh_frame -- PFM. */ .LEND_vsyscall: .size __kernel_vsyscall,.-.LSTART_vsyscall - .previous .section .eh_frame,"a",@progbits + .previous .LCIE: .ualong .LCIE_end - .LCIE_start .LCIE_start: -- cgit v1.2.3 From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Nov 2010 16:26:55 +0100 Subject: BKL: remove extraneous #include The big kernel lock has been removed from all these files at some point, leaving only the #include. Remove this too as a cleanup. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- arch/blackfin/kernel/process.c | 1 - arch/frv/kernel/process.c | 1 - arch/h8300/kernel/process.c | 1 - arch/m68k/kernel/process.c | 1 - arch/m68knommu/kernel/process.c | 1 - arch/mn10300/kernel/process.c | 1 - arch/parisc/hpux/sys_hpux.c | 1 - arch/parisc/kernel/sys_parisc32.c | 1 - arch/powerpc/kernel/sys_ppc32.c | 1 - arch/s390/kernel/compat_linux.c | 1 - arch/sparc/kernel/leon_smp.c | 1 - arch/sparc/kernel/sys_sparc32.c | 1 - arch/sparc/kernel/sys_sparc_32.c | 1 - arch/sparc/kernel/unaligned_32.c | 1 - arch/sparc/kernel/windows.c | 1 - arch/tile/kernel/compat.c | 1 - arch/tile/kernel/compat_signal.c | 1 - arch/tile/kernel/signal.c | 1 - arch/tile/kernel/smpboot.c | 1 - arch/tile/kernel/sys.c | 1 - arch/tile/mm/fault.c | 1 - arch/tile/mm/hugetlbpage.c | 1 - arch/um/kernel/exec.c | 1 - arch/x86/ia32/sys_ia32.c | 1 - arch/x86/kernel/cpuid.c | 1 - arch/x86/kernel/msr.c | 1 - 26 files changed, 26 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index cd0c090ebc54..b407bc8ad918 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 2b63b0191f52..efad12071c2e 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 97478138e361..933bd388efb2 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 18732ab23292..c2a1fc23dd75 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 6d3390590e5b..e2a63af5d517 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 0d0f8049a17b..e1b14a6ed544 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index ba430a03bc7a..30394081d9b6 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 9779ece2b070..88a0ad14a9c9 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b1b6043a56c4..4e5bf1edc0f2 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 1e6449c79ab6..53acaa86dd94 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 7524689b03d2..16582d85368a 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index e6375a750d9a..6db18c6927fb 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 675c9e11ada5..42b282fa6112 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 12b9f352595f..4491f4cb2695 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -16,7 +16,6 @@ #include #include #include -#include #include enum direction { diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index b351770cbdd6..3107381e576d 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -9,7 +9,6 @@ #include #include #include -#include #include diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 67617a05e602..dbc213adf5e1 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index fb64b99959d4..543d6a33aa26 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 687719d4abd1..757407e36696 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 74d62d098edf..b949edcec200 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index 7e764669a022..e2187d24a9b4 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index f295b4ac941d..dcebfc831cd6 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 24688b697a8d..201a582c4137 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 340268be00b5..09bd7b585726 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -5,7 +5,6 @@ #include "linux/stddef.h" #include "linux/fs.h" -#include "linux/smp_lock.h" #include "linux/ptrace.h" #include "linux/sched.h" #include "linux/slab.h" diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 849813f398e7..5852519b2d0f 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 1b7b31ab7d86..212a6a42527c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7bf2dc4c8f70..12fcbe2c143e 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 10a6e67648d4b47769953bd24759ba9609bf00df Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 15 Nov 2010 08:07:35 -0600 Subject: kgdb,x86: fix regression in detach handling The fix from ba773f7c510c0b252145933926c636c439889207 (x86,kgdb: Fix hw breakpoint regression) was not entirely complete. The kgdb_remove_all_hw_break() function also needs to call the hw_break_release_slot() or else a breakpoint can get activated again after the debugger has detached. The kgdb test suite exposes the behavior in the form of either a hang or repetitive failure. The kernel config that exposes the problem contains all of the following: CONFIG_DEBUG_RODATA=y CONFIG_KGDB_TESTS=y CONFIG_KGDB_TESTS_ON_BOOT=y CONFIG_KGDB_TESTS_BOOT_STRING="V1F100" Reported-by: Frederic Weisbecker Signed-off-by: Jason Wessel Tested-by: Frederic Weisbecker --- arch/x86/kernel/kgdb.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index ec592caac4b4..cd21b654dec6 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -315,14 +315,18 @@ static void kgdb_remove_all_hw_break(void) if (!breakinfo[i].enabled) continue; bp = *per_cpu_ptr(breakinfo[i].pev, cpu); - if (bp->attr.disabled == 1) + if (!bp->attr.disabled) { + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; continue; + } if (dbg_is_early) early_dr7 &= ~encode_dr7(i, breakinfo[i].len, breakinfo[i].type); - else - arch_uninstall_hw_breakpoint(bp); - bp->attr.disabled = 1; + else if (hw_break_release_slot(i)) + printk(KERN_ERR "KGDB: hw bpt remove failed %lx\n", + breakinfo[i].addr); + breakinfo[i].enabled = 0; } } -- cgit v1.2.3 From e3839ed8e89e79202c0402ac46965c0686897890 Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Tue, 16 Nov 2010 16:02:00 -0600 Subject: kgdb,ppc: Fix regression in evr register handling Commit ff10b88b5a05c8f1646dd15fb9f6093c1384ff6d (kgdb,ppc: Individual register get/set for ppc) introduced a problem where memcpy was used incorrectly to read and write the evr registers with a kernel that has: CONFIG_FSL_BOOKE=y CONFIG_SPE=y CONFIG_KGDB=y This patch also fixes the following compilation problems: arch/powerpc/kernel/kgdb.c: In function 'dbg_get_reg': arch/powerpc/kernel/kgdb.c:341: error: passing argument 2 of 'memcpy' makes pointer from integer without a cast arch/powerpc/kernel/kgdb.c: In function 'dbg_set_reg': arch/powerpc/kernel/kgdb.c:366: error: passing argument 1 of 'memcpy' makes pointer from integer without a cast [jason.wessel@windriver.com: Remove void * casts and fix patch header] Reported-by: Milton Miller Signed-off-by: Dongdong Deng Acked-by: Kumar Gala Signed-off-by: Jason Wessel CC: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/kernel/kgdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7a9db64f3f04..42850ee00ada 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -337,7 +337,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) /* FP registers 32 -> 63 */ #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) if (current) - memcpy(mem, current->thread.evr[regno-32], + memcpy(mem, ¤t->thread.evr[regno-32], dbg_reg_def[regno].size); #else /* fp registers not used by kernel, leave zero */ @@ -362,7 +362,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) if (regno >= 32 && regno < 64) { /* FP registers 32 -> 63 */ #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) - memcpy(current->thread.evr[regno-32], mem, + memcpy(¤t->thread.evr[regno-32], mem, dbg_reg_def[regno].size); #else /* fp registers not used by kernel, leave zero */ -- cgit v1.2.3 From 0a77fe4c188e25917799f2356d4aa5e6d80c39a2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 19 Oct 2010 18:48:35 +0200 Subject: KVM: Correct ordering of ldt reload wrt fs/gs reload If fs or gs refer to the ldt, they must be reloaded after the ldt. Reorder the code to that effect. Userspace code that uses the ldt with kvm is nonexistent, so this doesn't fix a user-visible bug. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 82e144a4e514..1ca12298ffc7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3395,6 +3395,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; load_host_msrs(vcpu); + kvm_load_ldt(ldt_selector); loadsegment(fs, fs_selector); #ifdef CONFIG_X86_64 load_gs_index(gs_selector); @@ -3402,7 +3403,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #else loadsegment(gs, gs_selector); #endif - kvm_load_ldt(ldt_selector); reload_tss(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8da0e45ff7c9..6fe7df75bfd4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -839,8 +839,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; - if (vmx->host_state.fs_reload_needed) - loadsegment(fs, vmx->host_state.fs_sel); if (vmx->host_state.gs_ldt_reload_needed) { kvm_load_ldt(vmx->host_state.ldt_sel); #ifdef CONFIG_X86_64 @@ -850,6 +848,8 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) loadsegment(gs, vmx->host_state.gs_sel); #endif } + if (vmx->host_state.fs_reload_needed) + loadsegment(fs, vmx->host_state.fs_sel); reload_tss(); #ifdef CONFIG_X86_64 if (is_long_mode(&vmx->vcpu)) { -- cgit v1.2.3 From c8770e7ba63bb5dd8fe5f9d251275a8fa717fb78 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 11 Nov 2010 12:37:26 +0200 Subject: KVM: VMX: Fix host userspace gsbase corruption We now use load_gs_index() to load gs safely; unfortunately this also changes MSR_KERNEL_GS_BASE, which we managed separately. This resulted in confusion and breakage running 32-bit host userspace on a 64-bit kernel. Fix by - saving guest MSR_KERNEL_GS_BASE before we we reload the host's gs - doing the host save/load unconditionally, instead of only when in guest long mode Things can be cleaned up further, but this is the minmal fix for now. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6fe7df75bfd4..ff21fdda0c53 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -821,10 +821,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #endif #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) { - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + if (is_long_mode(&vmx->vcpu)) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - } #endif for (i = 0; i < vmx->save_nmsrs; ++i) kvm_set_shared_msr(vmx->guest_msrs[i].index, @@ -839,11 +838,14 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; +#ifdef CONFIG_X86_64 + if (is_long_mode(&vmx->vcpu)) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); +#endif if (vmx->host_state.gs_ldt_reload_needed) { kvm_load_ldt(vmx->host_state.ldt_sel); #ifdef CONFIG_X86_64 load_gs_index(vmx->host_state.gs_sel); - wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); #else loadsegment(gs, vmx->host_state.gs_sel); #endif @@ -852,10 +854,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) loadsegment(fs, vmx->host_state.fs_sel); reload_tss(); #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) { - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); - } + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif if (current_thread_info()->status & TS_USEDFPU) clts(); -- cgit v1.2.3 From acbfd58e8a1f24ef644a90a90825c3f435b990f5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Nov 2010 14:39:24 +1100 Subject: powerpc: Fix div64 in bootloader The code is missing a fix that went into the main kernel variant (we should try to share that code again at some stage) Reported-by: Albert Cahalan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/boot/div64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/boot/div64.S b/arch/powerpc/boot/div64.S index 722f360a32a9..d271ab542673 100644 --- a/arch/powerpc/boot/div64.S +++ b/arch/powerpc/boot/div64.S @@ -33,9 +33,10 @@ __div64_32: cntlzw r0,r5 # we are shifting the dividend right li r10,-1 # to make it < 2^32, and shifting srw r10,r10,r0 # the divisor right the same amount, - add r9,r4,r10 # rounding up (so the estimate cannot + addc r9,r4,r10 # rounding up (so the estimate cannot andc r11,r6,r10 # ever be too large, only too small) andc r9,r9,r10 + addze r9,r9 or r11,r5,r11 rotlw r9,r9,r0 rotlw r11,r11,r0 -- cgit v1.2.3 From 01cf6fe8553b0ac649f3323d8da69b51cad8c468 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Thu, 14 Oct 2010 14:48:52 +0000 Subject: powerpc/pseries: Don't override CONFIG_PPC_PSERIES_DEBUG EEH and pci_dlpar #undef DEBUG, but I think they were added before the ability to control this from Kconfig. It's really annoying to only get some of the debug messages from these files. Leave the lpar.c #undef alone as it produces so much output as to make the kernel unusable. Update the Kconfig text to indicate this particular quirk :) Signed-off-by: Nishanth Aravamudan Acked-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/Kconfig | 6 ++++++ arch/powerpc/platforms/pseries/eeh.c | 2 -- arch/powerpc/platforms/pseries/pci_dlpar.c | 2 -- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index c667f0f02c34..3139814f6439 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -47,6 +47,12 @@ config LPARCFG config PPC_PSERIES_DEBUG depends on PPC_PSERIES && PPC_EARLY_DEBUG bool "Enable extra debug logging in platforms/pseries" + help + Say Y here if you want the pseries core to produce a bunch of + debug messages to the system log. Select this if you are having a + problem with the pseries core and want to see more of what is + going on. This does not enable debugging in lpar.c, which must + be manually done due to its verbosity. default y config PPC_SMLPAR diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 34b7dc12e731..17a11c82e6f8 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -21,8 +21,6 @@ * Please address comments and feedback to Linas Vepstas */ -#undef DEBUG - #include #include #include diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4b7a062dee15..5fcc92a12d3e 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -25,8 +25,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#undef DEBUG - #include #include #include -- cgit v1.2.3 From 4a89261b02d421cc1bcadaaebb90bb7919db0854 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 10 Nov 2010 12:29:49 +0000 Subject: powerpc/mm: Fix build error in setup_initial_memory_limit arch/powerpc/mm/tlb_nohash.c: In function 'setup_initial_memory_limit': arch/powerpc/mm/tlb_nohash.c:588:29: error: 'ppc64_memblock_base' undeclared (first use in this function) arch/powerpc/mm/tlb_nohash.c:588:29: note: each undeclared identifier is reported only once for each function it appears in Due to a copy/paste typo with the following commit: commit cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Author: Benjamin Herrenschmidt Date: Tue Jul 6 15:39:02 2010 -0700 memblock: Remove rmo_size, burry it in arch/powerpc where it belongs Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/tlb_nohash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 36c0c449a899..2a030d89bbc6 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -585,6 +585,6 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); /* Finally limit subsequent allocations */ - memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size); + memblock_set_current_limit(first_memblock_base + ppc64_rma_size); } #endif /* CONFIG_PPC64 */ -- cgit v1.2.3 From 234a71a7d64832e4aa8d9b5c80133480b86f6de3 Mon Sep 17 00:00:00 2001 From: kerstin jonsson Date: Fri, 22 Oct 2010 00:17:55 +0000 Subject: powerpc: Set CONFIG_32BIT on ppc32 commit ffe8018c3424892c9590048fc36caa6c3e0c8a76 of the -mm tree fixes the initramfs size calculation for e.g. s390 but breaks it for 32bit architectures which do not define CONFIG_32BIT. This patch fix the problem for PPC32 which will elsewise end up with a __initramfs_size of 0. Signed-off-by: Kerstin Jonsson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b6447190e1a2..e625e9e034ae 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -4,6 +4,10 @@ config PPC32 bool default y if !PPC64 +config 32BIT + bool + default y if PPC32 + config 64BIT bool default y if PPC64 -- cgit v1.2.3 From 1c2c25c78740b2796c7c06640784cb6732fa4907 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 17 Nov 2010 16:32:59 +0000 Subject: powerpc: Fix call to subpage_protection() In: powerpc/mm: Fix pgtable cache cleanup with CONFIG_PPC_SUBPAGE_PROT commit d28513bc7f675d28b479db666d572e078ecf182d Author: David Gibson subpage_protection() was changed to to take an mm rather a pgdir but it didn't change calling site in hashpage_preload(). The change wasn't noticed at compile time since hashpage_preload() used a void* as the parameter to subpage_protection(). This is obviously wrong and can trigger the following crash when CONFIG_SLAB, CONFIG_DEBUG_SLAB, CONFIG_PPC_64K_PAGES CONFIG_PPC_SUBPAGE_PROT are enabled. Freeing unused kernel memory: 704k freed Unable to handle kernel paging request for data at address 0x6b6b6b6b6b6c49b7 Faulting instruction address: 0xc0000000000410f4 cpu 0x2: Vector: 300 (Data Access) at [c00000004233f590] pc: c0000000000410f4: .hash_preload+0x258/0x338 lr: c000000000041054: .hash_preload+0x1b8/0x338 sp: c00000004233f810 msr: 8000000000009032 dar: 6b6b6b6b6b6c49b7 dsisr: 40000000 current = 0xc00000007e2c0070 paca = 0xc000000007fe0500 pid = 1, comm = init enter ? for help [c00000004233f810] c000000000041020 .hash_preload+0x184/0x338 (unreliable) [c00000004233f8f0] c00000000003ed98 .update_mmu_cache+0xb0/0xd0 [c00000004233f990] c000000000157754 .__do_fault+0x48c/0x5dc [c00000004233faa0] c000000000158fd0 .handle_mm_fault+0x508/0xa8c [c00000004233fb90] c0000000006acdd4 .do_page_fault+0x428/0x6ac [c00000004233fe30] c000000000005260 handle_page_fault+0x20/0x74 Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 83f534d862db..5e9584405c45 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1123,7 +1123,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, else #endif /* CONFIG_PPC_HAS_HASH_64K */ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(pgdir, ea)); + subpage_protection(mm, ea)); /* Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do -- cgit v1.2.3 From 82ae5eaffad40a6d4738e8a57e48dd0d903a9ef0 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 17 Nov 2010 07:20:32 +0000 Subject: powerpc/mm: Fix module instruction tlb fault handling on Book-E 64 We were seeing oops like the following when we did an rmmod on a module: Unable to handle kernel paging request for instruction fetch Faulting instruction address: 0x8000000000008010 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=2 P5020 DS last sysfs file: /sys/devices/qman-portals.2/qman-pool.9/uevent Modules linked in: qman_tester(-) NIP: 8000000000008010 LR: c000000000074858 CTR: 8000000000008010 REGS: c00000002e29bab0 TRAP: 0400 Not tainted (2.6.34.6-00744-g2d21f14) MSR: 0000000080029000 CR: 24000448 XER: 00000000 TASK = c00000007a8be600[4987] 'rmmod' THREAD: c00000002e298000 CPU: 1 GPR00: 8000000000008010 c00000002e29bd30 8000000000012798 c00000000035fb28 GPR04: 0000000000000002 0000000000000002 0000000024022428 c000000000009108 GPR08: fffffffffffffffe 800000000000a618 c0000000003c13c8 0000000000000000 GPR12: 0000000022000444 c00000000fffed00 0000000000000000 0000000000000000 GPR16: 00000000100c0000 0000000000000000 00000000100dabc8 0000000010099688 GPR20: 0000000000000000 00000000100cfc28 0000000000000000 0000000010011a44 GPR24: 00000000100017b2 0000000000000000 0000000000000000 0000000000000880 GPR28: c00000000035fb28 800000000000a7b8 c000000000376d80 c0000000003cce50 NIP [8000000000008010] .test_exit+0x0/0x10 [qman_tester] LR [c000000000074858] .SyS_delete_module+0x1f8/0x2f0 Call Trace: [c00000002e29bd30] [c0000000000748b4] .SyS_delete_module+0x254/0x2f0 (unreliable) [c00000002e29be30] [c000000000000580] syscall_exit+0x0/0x2c Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX 38600000 4e800020 60000000 60000000 <4e800020> 60000000 60000000 60000000 ---[ end trace 4f57124939a84dc8 ]--- This appears to be due to checking the wrong permission bits in the instruction_tlb_miss handling if the address that faulted was in vmalloc space. We need to look at the supervisor execute (_PAGE_BAP_SX) bit and not the user bit (_PAGE_BAP_UX/_PAGE_EXEC). Also removed a branch level since it did not appear to be used. Reported-by: Jeffrey Ladouceur Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/tlb_low_64e.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 8b04c54e596f..8526bd9d2aa3 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -138,8 +138,11 @@ cmpldi cr0,r15,0 /* Check for user region */ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ beq normal_tlb_miss + + li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ + oris r11,r11,_PAGE_ACCESSED@h /* XXX replace the RMW cycles with immediate loads + writes */ -1: mfspr r10,SPRN_MAS1 + mfspr r10,SPRN_MAS1 cmpldi cr0,r15,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 -- cgit v1.2.3 From 0f6b77ca12bea571e0a97b0588f62aa5f6012d61 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Tue, 16 Nov 2010 07:55:16 +0000 Subject: powerpc: Update a BKL related comment The commit 5e3d20a remove bkl from startup code so setup_arch() it isn't called with bkl held anymore. Update the comment on top of that function. Fix also a typo. This work was supported by a hardware donation from the CE Linux Forum. Signed-off-by: Alessio Igor Bogani Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a178b0ebcdf..ce6f61c6f871 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -497,9 +497,8 @@ static void __init emergency_stack_init(void) } /* - * Called into from start_kernel, after lock_kernel has been called. - * Initializes bootmem, which is unsed to manage page allocation until - * mem_init is called. + * Called into from start_kernel this initializes bootmem, which is used + * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) { -- cgit v1.2.3 From d53e4307c2f3856167407a1d9b8f8fa001286066 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 17 Nov 2010 06:50:17 +0000 Subject: sh: Use GCC __builtin_prefetch() to implement prefetch(). GCC's __builtin_prefetch() was introduced a long time ago, all supported GCC versions have it. So this patch is to use it for implementing the prefetch on SH2A and SH4. The current prefetch implementation is almost equivalent with __builtin_prefetch. The third parameter in the __builtin_prefetch is the locality that it's not supported on SH architectures. It has been set to three and it should be verified if it's suitable for SH2A as well. I didn't test on this architecture. The builtin usage should be more efficient that an __asm__ because less barriers, and because the compiler doesn't see the inst as a "black box" allowing better code generation. This has been already done on other architectures (see the commit: 0453fb3c528c5eb3483441a466b24a4cb409eec5). Many thanks to Christian Bruel for his support on evaluate the impact of the gcc built-in on SH4 arch. No regressions found while testing with LMbench on STLinux targets. Signed-off-by: Giuseppe Cavallaro Signed-off-by: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/include/asm/processor_32.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 46d5179c9f49..e3c73cdd8c90 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -199,10 +199,13 @@ extern unsigned long get_wchan(struct task_struct *p); #define ARCH_HAS_PREFETCHW static inline void prefetch(void *x) { - __asm__ __volatile__ ("pref @%0\n\t" : : "r" (x) : "memory"); + __builtin_prefetch(x, 0, 3); } -#define prefetchw(x) prefetch(x) +static inline void prefetchw(void *x) +{ + __builtin_prefetch(x, 1, 3); +} #endif #endif /* __KERNEL__ */ -- cgit v1.2.3 From 4d048435e9864998f6a6ad16422393d42322716d Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 17 Nov 2010 11:44:00 +0000 Subject: ARM: mach-shmobile: sh7372 USB0/IIC1 MSTP fix Fix a MSTP assignment problem in the sh7372 clock framework code. The USB drivers should attach to MSTP322 not MSTP33 where IIC1 is located. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7db31e6c6bf2..d3313a95dbfc 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -647,8 +647,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */ CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */ CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */ - CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP323]), /* USB0 */ - CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP323]), /* USB0 */ + CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */ + CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP322]), /* USB0 */ CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */ CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */ CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMC */ -- cgit v1.2.3 From 0e2af2a9abf94b408ff70679b692a8644fed4aab Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Fri, 12 Nov 2010 09:50:54 -0500 Subject: x86, hw_nmi: Move backtrace_mask declaration under ARCH_HAS_NMI_WATCHDOG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit backtrace_mask has been used under the code context of ARCH_HAS_NMI_WATCHDOG. So put it into that context. We were warned by the following warning: arch/x86/kernel/apic/hw_nmi.c:21: warning: ‘backtrace_mask’ defined but not used Signed-off-by: Rakib Mullick Signed-off-by: Don Zickus LKML-Reference: <1289573455-3410-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/hw_nmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd6942f0e9..62f6e1e55b90 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -17,15 +17,16 @@ #include #include -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; } #ifdef ARCH_HAS_NMI_WATCHDOG + +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; + void arch_trigger_all_cpu_backtrace(void) { int i; -- cgit v1.2.3 From 96e612ffc301372d3a3b94e2cb5d1e0c1c207dd1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 16 Nov 2010 13:45:16 +0900 Subject: x86, asm: Fix binutils 2.15 build failure Add parentheses around one pushl_cfi argument. Commit df5d1874 "x86: Use {push,pop}{l,q}_cfi in more places" caused GNU assembler 2.15 (Debian Sarge) to fail. It is still failing as of commit 07bd8516 "x86, asm: Restore parentheses around one pushl_cfi argument". This patch solves build failure with GNU assembler 2.15. Signed-off-by: Tetsuo Handa Acked-by: Jan Beulich Cc: heukelum@fastmail.fm Cc: hpa@linux.intel.com LKML-Reference: <201011160445.oAG4jGif079860@www262.sakura.ne.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 59e175e89599..591e60104278 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -395,7 +395,7 @@ sysenter_past_esp: * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) + pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) CFI_REL_OFFSET eip, 0 pushl_cfi %eax -- cgit v1.2.3 From 9223081f54e3dc5045fe41a475165d9003c9a779 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 13 Nov 2010 10:52:09 -0800 Subject: x86: Use online node real index in calulate_tbl_offset() Found a NUMA system that doesn't have RAM installed at the first socket which hangs while executing init scripts. bisected it to: | commit 932967202182743c01a2eee4bdfa2c42697bc586 | Author: Shaohua Li | Date: Wed Oct 20 11:07:03 2010 +0800 | | x86: Spread tlb flush vector between nodes It turns out when first socket is not online it could have cpus on node1 tlb_offset set to bigger than NUM_INVALIDATE_TLB_VECTORS. That could affect systems like 4 sockets, but socket 2 doesn't have installed, sockets 3 will get too big tlb_offset. Need to use real online node idx. Signed-off-by: Yinghai Lu Acked-by: Shaohua Li Cc: Linus Torvalds LKML-Reference: <4CDEDE59.40603@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 12cdbb17ad18..6acc724d5d8f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, static void __cpuinit calculate_tlb_offset(void) { - int cpu, node, nr_node_vecs; + int cpu, node, nr_node_vecs, idx = 0; /* * we are changing tlb_vector_offset for each CPU in runtime, but this * will not cause inconsistency, as the write is atomic under X86. we @@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void) nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; for_each_online_node(node) { - int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * + int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * nr_node_vecs; int cpu_offset = 0; for_each_cpu(cpu, cpumask_of_node(node)) { @@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void) cpu_offset++; cpu_offset = cpu_offset % nr_node_vecs; } + idx++; } } -- cgit v1.2.3 From 8191c9f69202d4dbc66063cb92059b8a58640d34 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 16 Nov 2010 16:23:52 -0600 Subject: x86: UV: Address interrupt/IO port operation conflict This patch for SGI UV systems addresses a problem whereby interrupt transactions being looped back from a local IOH, through the hub to a local CPU can (erroneously) conflict with IO port operations and other transactions. To workaound this we set a high bit in the APIC IDs used for interrupts. This bit appears to be ignored by the sockets, but it avoids the conflict in the hub. Signed-off-by: Dimitri Sivanich LKML-Reference: <20101116222352.GA8155@sgi.com> Signed-off-by: Ingo Molnar ___ arch/x86/include/asm/uv/uv_hub.h | 4 ++++ arch/x86/include/asm/uv/uv_mmrs.h | 19 ++++++++++++++++++- arch/x86/kernel/apic/x2apic_uv_x.c | 25 +++++++++++++++++++++++-- arch/x86/platform/uv/tlb_uv.c | 2 +- arch/x86/platform/uv/uv_time.c | 4 +++- 5 files changed, 49 insertions(+), 5 deletions(-) --- arch/x86/include/asm/uv/uv_hub.h | 4 ++++ arch/x86/include/asm/uv/uv_mmrs.h | 19 ++++++++++++++++++- arch/x86/kernel/apic/x2apic_uv_x.c | 25 +++++++++++++++++++++++-- arch/x86/platform/uv/tlb_uv.c | 2 +- arch/x86/platform/uv/uv_time.c | 4 +++- 5 files changed, 49 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index e969f691cbfd..a501741c2335 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -199,6 +199,8 @@ union uvh_apicid { #define UVH_APICID 0x002D0E00L #define UV_APIC_PNODE_SHIFT 6 +#define UV_APICID_HIBIT_MASK 0xffff0000 + /* Local Bus from cpu's perspective */ #define LOCAL_BUS_BASE 0x1c00000 #define LOCAL_BUS_SIZE (4 * 1024 * 1024) @@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) } } +extern unsigned int uv_apicid_hibits; static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) { + apicid |= uv_apicid_hibits; return (1UL << UVH_IPI_INT_SEND_SHFT) | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 6d90adf4428a..20cafeac7455 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H @@ -753,6 +753,23 @@ union uvh_lb_bau_sb_descriptor_base_u { } s; }; +/* ========================================================================= */ +/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */ +/* ========================================================================= */ +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0 + +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 +#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL + +union uvh_lb_target_physical_apic_id_mask_u { + unsigned long v; + struct uvh_lb_target_physical_apic_id_mask_s { + unsigned long bit_enables : 32; /* RW */ + unsigned long rsvd_32_63 : 32; /* */ + } s; +}; + /* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 194539aea175..c1c52c341f40 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr; static union uvh_apicid uvh_apicid; int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); +unsigned int uv_apicid_hibits; +EXPORT_SYMBOL_GPL(uv_apicid_hibits); static DEFINE_SPINLOCK(uv_nmi_lock); static inline bool is_GRU_range(u64 start, u64 end) @@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void) uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; } +/* + * Add an extra bit as dictated by bios to the destination apicid of + * interrupts potentially passing through the UV HUB. This prevents + * a deadlock between interrupts and IO port operations. + */ +static void __init uv_set_apicid_hibit(void) +{ + union uvh_lb_target_physical_apic_id_mask_u apicid_mask; + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | + UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr)); + apicid_mask.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; +} + static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int nodeid; @@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) __get_cpu_var(x2apic_extra_bits) = nodeid << (uvh_apicid.s.pnode_shift - 1); uv_system_type = UV_NON_UNIQUE_APIC; + uv_set_apicid_hibit(); return 1; } } @@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri int pnode; pnode = uv_apicid_to_pnode(phys_apicid); + phys_apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | @@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) int cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); + return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; else return BAD_APICID; } @@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - return per_cpu(x86_cpu_to_apicid, cpu); + return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; } static unsigned int x2apic_get_apic_id(unsigned long x) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index a318194002b5..ba9caa808a9c 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector) * the below initialization can't be in firmware because the * messaging IRQ will be determined by the OS */ - apicid = uvhub_to_first_apicid(uvhub); + apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, ((apicid << 32) | vector)); } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 56e421bc379b..9daf5d1af9f1 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu) apicid = cpu_physical_id(cpu); pnode = uv_apicid_to_pnode(apicid); + apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); @@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode) static int uv_setup_intr(int cpu, u64 expires) { u64 val; + unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits; int pnode = uv_cpu_to_pnode(cpu); uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, @@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires) UVH_EVENT_OCCURRED0_RTC1_MASK); val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | - ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); + ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); /* Set configuration */ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); -- cgit v1.2.3 From de31ec8a31046111befd16a7083e3bdda2ff42cf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 18 Nov 2010 19:16:55 +0900 Subject: x86/kprobes: Prevent kprobes to probe on save_args() Prevent kprobes to probe on save_args() since this function will be called from breakpoint exception handler. That will cause infinit loop on breakpoint handling. Signed-off-by: Masami Hiramatsu Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Ananth N Mavinakayanahalli LKML-Reference: <20101118101655.2779.2816.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fe2690d71c0c..e3ba417e8697 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64) .endm /* save partial stack frame */ + .pushsection .kprobes.text, "ax" ENTRY(save_args) XCPT_FRAME cld @@ -334,6 +335,7 @@ ENTRY(save_args) ret CFI_ENDPROC END(save_args) + .popsection ENTRY(save_rest) PARTIAL_FRAME 1 REST_SKIP+8 -- cgit v1.2.3 From 37db6c8f1d0c4b8f01dc049f3a893b725288660f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 16 Nov 2010 08:25:08 +0000 Subject: x86-64: Fix and clean up AMD Fam10 MMCONF enabling Candidate memory ranges were not calculated properly (start addresses got needlessly rounded down, and end addresses didn't get rounded up at all), address comparison for secondary CPUs was done on only part of the address, and disabled status wasn't tracked properly. Signed-off-by: Jan Beulich Acked-by: Yinghai Lu Acked-by: Andreas Herrmann LKML-Reference: <4CE24DF40200007800022737@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 2 +- arch/x86/kernel/mmconf-fam10h_64.c | 64 ++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ea3dc487047..6b89f5e86021 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -128,7 +128,7 @@ #define FAM10H_MMIO_CONF_ENABLE (1<<0) #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 -#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 6da143c2a6b8..ac861b8348e2 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -25,7 +25,6 @@ struct pci_hostbridge_probe { }; static u64 __cpuinitdata fam10h_pci_mmconf_base; -static int __cpuinitdata fam10h_pci_mmconf_base_status; static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, @@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2) return start1 - start2; } -/*[47:0] */ -/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ +#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT) +#define MMCONF_MASK (~(MMCONF_UNIT - 1)) +#define MMCONF_SIZE (MMCONF_UNIT << 8) +/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */ #define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) -#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) +#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40)) static void __cpuinit get_fam10h_pci_mmconf_base(void) { int i; @@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) struct range range[8]; /* only try to get setting from BSP */ - /* -1 or 1 */ - if (fam10h_pci_mmconf_base_status) + if (fam10h_pci_mmconf_base) return; if (!early_pci_allowed()) - goto fail; + return; found = 0; for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { @@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) } if (!found) - goto fail; + return; /* SYS_CFG */ address = MSR_K8_SYSCFG; @@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) /* TOP_MEM2 is not enabled? */ if (!(val & (1<<21))) { - tom2 = 0; + tom2 = 1ULL << 32; } else { /* TOP_MEM2 */ address = MSR_K8_TOP_MEM2; rdmsrl(address, val); - tom2 = val & (0xffffULL<<32); + tom2 = max(val & 0xffffff800000ULL, 1ULL << 32); } if (base <= tom2) - base = tom2 + (1ULL<<32); + base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK; /* * need to check if the range is in the high mmio range that is @@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) if (!(reg & 3)) continue; - start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/ reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); - end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/ - if (!end) + if (end < tom2) continue; range[hi_mmio_num].start = start; @@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) if (range[hi_mmio_num - 1].end < base) goto out; - if (range[0].start > base) + if (range[0].start > base + MMCONF_SIZE) goto out; /* need to find one window */ - base = range[0].start - (1ULL << 32); + base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT; if ((base > tom2) && BASE_VALID(base)) goto out; - base = range[hi_mmio_num - 1].end + (1ULL << 32); - if ((base > tom2) && BASE_VALID(base)) + base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK; + if (BASE_VALID(base)) goto out; /* need to find window between ranges */ - if (hi_mmio_num > 1) - for (i = 0; i < hi_mmio_num - 1; i++) { - if (range[i + 1].start > (range[i].end + (1ULL << 32))) { - base = range[i].end + (1ULL << 32); - if ((base > tom2) && BASE_VALID(base)) - goto out; - } + for (i = 1; i < hi_mmio_num; i++) { + base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK; + val = range[i].start & MMCONF_MASK; + if (val >= base + MMCONF_SIZE && BASE_VALID(base)) + goto out; } - -fail: - fam10h_pci_mmconf_base_status = -1; return; + out: fam10h_pci_mmconf_base = base; - fam10h_pci_mmconf_base_status = 1; } void __cpuinit fam10h_check_enable_mmcfg(void) @@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) /* only trust the one handle 256 buses, if acpi=off */ if (!acpi_pci_disabled || busnbits >= 8) { - u64 base; - base = val & (0xffffULL << 32); - if (fam10h_pci_mmconf_base_status <= 0) { + u64 base = val & MMCONF_MASK; + + if (!fam10h_pci_mmconf_base) { fam10h_pci_mmconf_base = base; - fam10h_pci_mmconf_base_status = 1; return; } else if (fam10h_pci_mmconf_base == base) return; @@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) * with 256 buses */ get_fam10h_pci_mmconf_base(); - if (fam10h_pci_mmconf_base_status <= 0) + if (!fam10h_pci_mmconf_base) { + pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF; return; + } printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); val &= ~((FAM10H_MMIO_CONF_BASE_MASK< Date: Fri, 19 Nov 2010 23:27:06 -0800 Subject: xen: re-enable boot-time ballooning Now that the balloon driver doesn't stumble over non-RAM pages, we can enable the extra space for ballooning. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/setup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 769c4b01fa32..630fb53c95f3 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -248,8 +248,7 @@ char * __init xen_memory_setup(void) else extra_pages = 0; - if (!xen_initial_domain()) - xen_add_extra_mem(extra_pages); + xen_add_extra_mem(extra_pages); return "Xen"; } -- cgit v1.2.3 From c0ba10b512eb2e2a3888b6e6cc0e089f5e7a191b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 21 Nov 2010 14:42:47 +0000 Subject: ARM: improve compiler's ability to optimize page tables Allow the compiler to better optimize the page table walking code by avoiding over-complex pmd_addr_end() calculations. These calculations prevent the compiler spotting that we'll never iterate over the PMD table, causing it to create double nested loops where a single loop will do. Signed-off-by: Russell King --- arch/arm/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index b155414192da..53d1d5deb111 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -374,6 +374,9 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) +/* we don't need complex calculations here as the pmd is folded into the pgd */ +#define pmd_addr_end(addr,end) (end) + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. -- cgit v1.2.3 From ffc43ef31395063d32e72de0a9c3d061347c3b94 Mon Sep 17 00:00:00 2001 From: Per Fransson Date: Mon, 15 Nov 2010 14:31:17 +0100 Subject: ARM: 6473/1: Small update to ux500 specific L2 cache code This change updates the ux500 specific outer cache code to use the new *_relaxed() I/O accessors. Signed-off-by: Per Fransson Signed-off-by: Russell King --- arch/arm/mach-ux500/cpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index 73fb1a551ec6..608a1372b172 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -75,14 +75,14 @@ void __init ux500_init_irq(void) static inline void ux500_cache_wait(void __iomem *reg, unsigned long mask) { /* wait for the operation to complete */ - while (readl(reg) & mask) + while (readl_relaxed(reg) & mask) ; } static inline void ux500_cache_sync(void) { void __iomem *base = __io_address(UX500_L2CC_BASE); - writel(0, base + L2X0_CACHE_SYNC); + writel_relaxed(0, base + L2X0_CACHE_SYNC); ux500_cache_wait(base + L2X0_CACHE_SYNC, 1); } @@ -107,7 +107,7 @@ static void ux500_l2x0_inv_all(void) uint32_t l2x0_way_mask = (1<<16) - 1; /* Bitmask of active ways */ /* invalidate all ways */ - writel(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); + writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); ux500_cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); ux500_cache_sync(); } -- cgit v1.2.3 From 963fec4e0f4993486ee91edfafba0c972834d8c3 Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Wed, 17 Nov 2010 10:08:49 +0100 Subject: ARM: 6484/1: fix compile warning in mm/init.c Commit 7c63984b86 (ARM: do not define VMALLOC_END relative to PAGE_OFFSET) changed VMALLOC_END to be an explicit value. Before this, it was relative to PAGE_OFFSET and therefore converted to unsigned long as PAGE_OFFSET is an unsigned long. This introduced the following build warning. Fix this by changing the explicit defines of VMALLOC_END to be unsigned long. CC arch/arm/mm/init.o arch/arm/mm/init.c: In function 'mem_init': arch/arm/mm/init.c:606: warning: format '%08lx' expects type 'long unsigned int', but argument 12 has type 'unsigned int' Signed-off-by: Anand Gadiyar Acked-by: Uwe Kleine-K Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mach-aaec2000/include/mach/vmalloc.h | 2 +- arch/arm/mach-bcmring/include/mach/vmalloc.h | 2 +- arch/arm/mach-clps711x/include/mach/vmalloc.h | 2 +- arch/arm/mach-ebsa110/include/mach/vmalloc.h | 2 +- arch/arm/mach-footbridge/include/mach/vmalloc.h | 2 +- arch/arm/mach-h720x/include/mach/vmalloc.h | 2 +- arch/arm/mach-integrator/include/mach/vmalloc.h | 2 +- arch/arm/mach-msm/include/mach/vmalloc.h | 2 +- arch/arm/mach-netx/include/mach/vmalloc.h | 2 +- arch/arm/mach-omap1/include/mach/vmalloc.h | 2 +- arch/arm/mach-omap2/include/mach/vmalloc.h | 2 +- arch/arm/mach-pnx4008/include/mach/vmalloc.h | 2 +- arch/arm/mach-rpc/include/mach/vmalloc.h | 2 +- arch/arm/mach-shark/include/mach/vmalloc.h | 2 +- arch/arm/mach-versatile/include/mach/vmalloc.h | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-aaec2000/include/mach/vmalloc.h b/arch/arm/mach-aaec2000/include/mach/vmalloc.h index cff4e0a996ce..a6299e8321bd 100644 --- a/arch/arm/mach-aaec2000/include/mach/vmalloc.h +++ b/arch/arm/mach-aaec2000/include/mach/vmalloc.h @@ -11,6 +11,6 @@ #ifndef __ASM_ARCH_VMALLOC_H #define __ASM_ARCH_VMALLOC_H -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL #endif /* __ASM_ARCH_VMALLOC_H */ diff --git a/arch/arm/mach-bcmring/include/mach/vmalloc.h b/arch/arm/mach-bcmring/include/mach/vmalloc.h index 3db3a09fd398..7397bd7817d9 100644 --- a/arch/arm/mach-bcmring/include/mach/vmalloc.h +++ b/arch/arm/mach-bcmring/include/mach/vmalloc.h @@ -22,4 +22,4 @@ * 0xe0000000 to 0xefffffff. This gives us 256 MB of vm space and handles * larger physical memory designs better. */ -#define VMALLOC_END 0xf0000000 +#define VMALLOC_END 0xf0000000UL diff --git a/arch/arm/mach-clps711x/include/mach/vmalloc.h b/arch/arm/mach-clps711x/include/mach/vmalloc.h index 30b3a287ed88..467b96137e47 100644 --- a/arch/arm/mach-clps711x/include/mach/vmalloc.h +++ b/arch/arm/mach-clps711x/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL diff --git a/arch/arm/mach-ebsa110/include/mach/vmalloc.h b/arch/arm/mach-ebsa110/include/mach/vmalloc.h index 60bde56fba4c..ea141b7a3e03 100644 --- a/arch/arm/mach-ebsa110/include/mach/vmalloc.h +++ b/arch/arm/mach-ebsa110/include/mach/vmalloc.h @@ -7,4 +7,4 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#define VMALLOC_END 0xdf000000 +#define VMALLOC_END 0xdf000000UL diff --git a/arch/arm/mach-footbridge/include/mach/vmalloc.h b/arch/arm/mach-footbridge/include/mach/vmalloc.h index 0ffbb7c85e59..40ba78e5782b 100644 --- a/arch/arm/mach-footbridge/include/mach/vmalloc.h +++ b/arch/arm/mach-footbridge/include/mach/vmalloc.h @@ -7,4 +7,4 @@ */ -#define VMALLOC_END 0xf0000000 +#define VMALLOC_END 0xf0000000UL diff --git a/arch/arm/mach-h720x/include/mach/vmalloc.h b/arch/arm/mach-h720x/include/mach/vmalloc.h index a45915b88756..8520b4a4d4e6 100644 --- a/arch/arm/mach-h720x/include/mach/vmalloc.h +++ b/arch/arm/mach-h720x/include/mach/vmalloc.h @@ -5,6 +5,6 @@ #ifndef __ARCH_ARM_VMALLOC_H #define __ARCH_ARM_VMALLOC_H -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL #endif diff --git a/arch/arm/mach-integrator/include/mach/vmalloc.h b/arch/arm/mach-integrator/include/mach/vmalloc.h index e056e7cf5645..2f5a2bafb11f 100644 --- a/arch/arm/mach-integrator/include/mach/vmalloc.h +++ b/arch/arm/mach-integrator/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL diff --git a/arch/arm/mach-msm/include/mach/vmalloc.h b/arch/arm/mach-msm/include/mach/vmalloc.h index 31a32ad062dc..d138448eff16 100644 --- a/arch/arm/mach-msm/include/mach/vmalloc.h +++ b/arch/arm/mach-msm/include/mach/vmalloc.h @@ -16,7 +16,7 @@ #ifndef __ASM_ARCH_MSM_VMALLOC_H #define __ASM_ARCH_MSM_VMALLOC_H -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL #endif diff --git a/arch/arm/mach-netx/include/mach/vmalloc.h b/arch/arm/mach-netx/include/mach/vmalloc.h index 7cca3574308f..871f1ef7bff5 100644 --- a/arch/arm/mach-netx/include/mach/vmalloc.h +++ b/arch/arm/mach-netx/include/mach/vmalloc.h @@ -16,4 +16,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL diff --git a/arch/arm/mach-omap1/include/mach/vmalloc.h b/arch/arm/mach-omap1/include/mach/vmalloc.h index b001f67d695b..22ec4a479577 100644 --- a/arch/arm/mach-omap1/include/mach/vmalloc.h +++ b/arch/arm/mach-omap1/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd8000000 +#define VMALLOC_END 0xd8000000UL diff --git a/arch/arm/mach-omap2/include/mach/vmalloc.h b/arch/arm/mach-omap2/include/mach/vmalloc.h index 4da31e997efe..866319947760 100644 --- a/arch/arm/mach-omap2/include/mach/vmalloc.h +++ b/arch/arm/mach-omap2/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xf8000000 +#define VMALLOC_END 0xf8000000UL diff --git a/arch/arm/mach-pnx4008/include/mach/vmalloc.h b/arch/arm/mach-pnx4008/include/mach/vmalloc.h index 31b65ee07b0b..184913c71141 100644 --- a/arch/arm/mach-pnx4008/include/mach/vmalloc.h +++ b/arch/arm/mach-pnx4008/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) */ -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL diff --git a/arch/arm/mach-rpc/include/mach/vmalloc.h b/arch/arm/mach-rpc/include/mach/vmalloc.h index 3bcd86fadb81..fb700228637a 100644 --- a/arch/arm/mach-rpc/include/mach/vmalloc.h +++ b/arch/arm/mach-rpc/include/mach/vmalloc.h @@ -7,4 +7,4 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#define VMALLOC_END 0xdc000000 +#define VMALLOC_END 0xdc000000UL diff --git a/arch/arm/mach-shark/include/mach/vmalloc.h b/arch/arm/mach-shark/include/mach/vmalloc.h index 8e845b6a7cb5..b10df988526d 100644 --- a/arch/arm/mach-shark/include/mach/vmalloc.h +++ b/arch/arm/mach-shark/include/mach/vmalloc.h @@ -1,4 +1,4 @@ /* * arch/arm/mach-shark/include/mach/vmalloc.h */ -#define VMALLOC_END 0xd0000000 +#define VMALLOC_END 0xd0000000UL diff --git a/arch/arm/mach-versatile/include/mach/vmalloc.h b/arch/arm/mach-versatile/include/mach/vmalloc.h index ebd8a2543d3b..7d8e069ad51b 100644 --- a/arch/arm/mach-versatile/include/mach/vmalloc.h +++ b/arch/arm/mach-versatile/include/mach/vmalloc.h @@ -18,4 +18,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd8000000 +#define VMALLOC_END 0xd8000000UL -- cgit v1.2.3 From 8fbb97a2bc9b8c91b5a4588ba80709e48dad3f16 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 19 Nov 2010 10:16:05 +0100 Subject: ARM: 6488/1: nomadik: prevent sched_clock() wraparound The current implementation of sched_clock() for the Nomadik family is based on the clock source that will wrap around without any compensation. Currently on the Ux500 after 1030 seconds. Utilize cnt32_to_63 to expand the sched_clock() counter to 63 bits and introduce a keepwarm() timer to assure that sched clock and this cnt32_to_63 is called atleast once every half period. When I print out the actual wrap-around time, and using a year (3600*24*365 seconds) as minumum wrap limit I get an actual wrap-around of: sched_clock: using 55 bits @ 8333125 Hz wrap in 416 days Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/plat-nomadik/timer.c | 89 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index aedf9c1d645e..63cdc6025bd7 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c @@ -3,6 +3,7 @@ * * Copyright (C) 2008 STMicroelectronics * Copyright (C) 2010 Alessandro Rubini + * Copyright (C) 2010 Linus Walleij for ST-Ericsson * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, as @@ -16,11 +17,13 @@ #include #include #include +#include +#include #include #include -void __iomem *mtu_base; /* ssigned by machine code */ +void __iomem *mtu_base; /* Assigned by machine code */ /* * Kernel assumes that sched_clock can be called early @@ -48,16 +51,82 @@ static struct clocksource nmdk_clksrc = { /* * Override the global weak sched_clock symbol with this * local implementation which uses the clocksource to get some - * better resolution when scheduling the kernel. We accept that - * this wraps around for now, since it is just a relative time - * stamp. (Inspired by OMAP implementation.) + * better resolution when scheduling the kernel. + * + * Because the hardware timer period may be quite short + * (32.3 secs on the 133 MHz MTU timer selection on ux500) + * and because cnt32_to_63() needs to be called at least once per + * half period to work properly, a kernel keepwarm() timer is set up + * to ensure this requirement is always met. + * + * Also the sched_clock timer will wrap around at some point, + * here we set it to run continously for a year. */ +#define SCHED_CLOCK_MIN_WRAP 3600*24*365 +static struct timer_list cnt32_to_63_keepwarm_timer; +static u32 sched_mult; +static u32 sched_shift; + unsigned long long notrace sched_clock(void) { - return clocksource_cyc2ns(nmdk_clksrc.read( - &nmdk_clksrc), - nmdk_clksrc.mult, - nmdk_clksrc.shift); + u64 cycles; + + if (unlikely(!mtu_base)) + return 0; + + cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0))); + /* + * sched_mult is guaranteed to be even so will + * shift out bit 63 + */ + return (cycles * sched_mult) >> sched_shift; +} + +/* Just kick sched_clock every so often */ +static void cnt32_to_63_keepwarm(unsigned long data) +{ + mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); + (void) sched_clock(); +} + +/* + * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm + * once in half a 32bit timer wrap interval. + */ +static void __init nmdk_sched_clock_init(unsigned long rate) +{ + u32 v; + unsigned long delta; + u64 days; + + /* Find the apropriate mult and shift factors */ + clocks_calc_mult_shift(&sched_mult, &sched_shift, + rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP); + /* We need to multiply by an even number to get rid of bit 63 */ + if (sched_mult & 1) + sched_mult++; + + /* Let's see what we get, take max counter and scale it */ + days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift; + do_div(days, NSEC_PER_SEC); + do_div(days, (3600*24)); + + pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n", + (64 - sched_shift), rate, (unsigned long) days); + + /* + * Program a timer to kick us at half 32bit wraparound + * Formula: seconds per wrap = (2^32) / f + */ + v = 0xFFFFFFFFUL / rate; + /* We want half of the wrap time to keep cnt32_to_63 warm */ + v /= 2; + pr_debug("sched_clock: prescaled timer rate: %lu Hz, " + "initialize keepwarm timer every %d seconds\n", rate, v); + /* Convert seconds to jiffies */ + delta = msecs_to_jiffies(v*1000); + setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta); + mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta)); } /* Clockevent device: use one-shot mode */ @@ -161,13 +230,15 @@ void __init nmdk_timer_init(void) writel(0, mtu_base + MTU_BGLR(0)); writel(cr | MTU_CRn_ENA, mtu_base + MTU_CR(0)); - /* Now the scheduling clock is ready */ + /* Now the clock source is ready */ nmdk_clksrc.read = nmdk_read_timer; if (clocksource_register(&nmdk_clksrc)) pr_err("timer: failed to initialize clock source %s\n", nmdk_clksrc.name); + nmdk_sched_clock_init(rate); + /* Timer 1 is used for events */ clockevents_calc_mult_shift(&nmdk_clkevt, rate, MTU_MIN_RANGE); -- cgit v1.2.3 From 1142b71d85894dcff1466dd6c871ea3c89e0352c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Nov 2010 13:18:31 +0100 Subject: ARM: 6489/1: thumb2: fix incorrect optimisation in usracc Commit 8b592783 added a Thumb-2 variant of usracc which, when it is called with \rept=2, calls usraccoff once with an offset of 0 and secondly with a hard-coded offset of 4 in order to avoid incrementing the pointer again. If \inc != 4 then we will store the data to the wrong offset from \ptr. Luckily, the only caller that passes \rept=2 to this function is __clear_user so we haven't been actively corrupting user data. This patch fixes usracc to pass \inc instead of #4 to usraccoff when it is called a second time. Cc: Reported-by: Tony Thompson Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 062b58c029ab..749bb6622404 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -238,7 +238,7 @@ @ Slightly optimised to avoid incrementing the pointer twice usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort .if \rept == 2 - usraccoff \instr, \reg, \ptr, \inc, 4, \cond, \abort + usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort .endif add\cond \ptr, #\rept * \inc -- cgit v1.2.3 From b0c4d4ee4e4f13fa3f017c00319a18c1ea31b7d7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 22 Nov 2010 12:00:59 +0000 Subject: ARM: avoid marking decompressor .stack section as having contents The .stack section doesn't contain any contents, and doesn't require initialization either. Rather than marking the output section with 'NOLOAD' but still having it exist in the object files, mark it with %nobits which avoids the assembler marking the section with 'CONTENTS'. Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 2 +- arch/arm/boot/compressed/vmlinux.lds.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 6825c34646d4..9be21ba648cd 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1084,6 +1084,6 @@ memdump: mov r12, r0 reloc_end: .align - .section ".stack", "w" + .section ".stack", "aw", %nobits user_stack: .space 4096 user_stack_end: diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in index d08168941bd6..366a924019ac 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.in @@ -57,7 +57,7 @@ SECTIONS .bss : { *(.bss) } _end = .; - .stack (NOLOAD) : { *(.stack) } + .stack : { *(.stack) } .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } -- cgit v1.2.3 From ec35a69c467026437519bafcf325a7362e422db9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 16 Nov 2010 12:09:59 -0500 Subject: xen: set IO permission early (before early_cpu_init()) This patch is based off "xen dom0: Set up basic IO permissions for dom0." by Juan Quintela . On AMD machines when we boot the kernel as Domain 0 we get this nasty: mapping kernel into physical memory Xen: setup ISA identity maps about to get started... (XEN) traps.c:475:d0 Unhandled general protection fault fault/trap [#13] on VCPU 0 [ec=0000] (XEN) domain_crash_sync called from entry.S (XEN) Domain 0 (vcpu#0) crashed on cpu#0: (XEN) ----[ Xen-4.1-101116 x86_64 debug=y Not tainted ]---- (XEN) CPU: 0 (XEN) RIP: e033:[] (XEN) RFLAGS: 0000000000000282 EM: 1 CONTEXT: pv guest (XEN) rax: 000000008000c068 rbx: ffffffff8186c680 rcx: 0000000000000068 (XEN) rdx: 0000000000000cf8 rsi: 000000000000c000 rdi: 0000000000000000 (XEN) rbp: ffffffff81801e98 rsp: ffffffff81801e50 r8: ffffffff81801eac (XEN) r9: ffffffff81801ea8 r10: ffffffff81801eb4 r11: 00000000ffffffff (XEN) r12: ffffffff8186c694 r13: ffffffff81801f90 r14: ffffffffffffffff (XEN) r15: 0000000000000000 cr0: 000000008005003b cr4: 00000000000006f0 (XEN) cr3: 0000000221803000 cr2: 0000000000000000 (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033 (XEN) Guest stack trace from rsp=ffffffff81801e50: RIP points to read_pci_config() function. The issue is that we don't set IO permissions for the Linux kernel early enough. The call sequence used to be: xen_start_kernel() x86_init.oem.arch_setup = xen_setup_arch; setup_arch: - early_cpu_init - early_init_amd - read_pci_config - x86_init.oem.arch_setup [ xen_arch_setup ] - set IO permissions. We need to set the IO permissions earlier on, which this patch does. Acked-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 12 +++++++++++- arch/x86/xen/setup.c | 8 -------- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bd3554934613..7250bef7f49e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1095,6 +1095,8 @@ static void __init xen_setup_stackprotector(void) /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { + struct physdev_set_iopl set_iopl; + int rc; pgd_t *pgd; if (!xen_start_info) @@ -1209,10 +1211,18 @@ asmlinkage void __init xen_start_kernel(void) #else pv_info.kernel_rpl = 0; #endif - /* set the limit of our address space */ xen_reserve_top(); + /* We used to do this in xen_arch_setup, but that is too late on AMD + * were early_cpu_init (run before ->arch_setup()) calls early_amd_init + * which pokes 0xcf8 port. + */ + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + xen_raw_printk("physdev_op failed %d\n", rc); + #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 630fb53c95f3..38fdffaa71d3 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -336,9 +336,6 @@ void __cpuinit xen_enable_syscall(void) void __init xen_arch_setup(void) { - struct physdev_set_iopl set_iopl; - int rc; - xen_panic_handler_init(); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); @@ -355,11 +352,6 @@ void __init xen_arch_setup(void) xen_enable_sysenter(); xen_enable_syscall(); - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - printk(KERN_INFO "physdev_op failed %d\n", rc); - #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); -- cgit v1.2.3 From 15ec44611904be0dcc97b84c29fbf964e5e2b36f Mon Sep 17 00:00:00 2001 From: Philip Rakity Date: Fri, 19 Nov 2010 16:48:39 -0500 Subject: mmc: sdhci: 8-bit bus width changes We now: * check for a v3 controller before setting 8-bit bus width * offer a callback for platform code to switch to 8-bit mode, which allows non-v3 controllers to support it * rely on mmc->caps |= MMC_CAP_8_BIT_DATA; in platform code to specify that the board designers have indeed brought out all the pins for 8-bit to the slot. We were previously relying only on whether the *controller* supported 8-bit, which doesn't tell us anything about the pin configuration in the board design. This fixes the MMC card regression reported by Maxim Levitsky here: http://thread.gmane.org/gmane.linux.kernel.mmc/4336 by no longer assuming that 8-bit works by default. Signed-off-by: Philip Rakity Tested-by: Giuseppe Cavallaro Signed-off-by: Chris Ball --- arch/arm/plat-pxa/include/plat/sdhci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/plat-pxa/include/plat/sdhci.h b/arch/arm/plat-pxa/include/plat/sdhci.h index e49c5b6fc4e2..1ab332e37d7d 100644 --- a/arch/arm/plat-pxa/include/plat/sdhci.h +++ b/arch/arm/plat-pxa/include/plat/sdhci.h @@ -17,6 +17,9 @@ /* Require clock free running */ #define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0) +/* Board design supports 8-bit data on SD/SDIO BUS */ +#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2) + /* * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI * @max_speed: the maximum speed supported -- cgit v1.2.3 From c2d0879112825cddddd6c4f9b2645ff32acd6dc5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 22 Nov 2010 16:31:35 -0800 Subject: xen: clean up "extra" memory handling some more Make sure that extra_pages is added for all E820_RAM regions beyond mem_end - completely excluded regions as well as the remains of partially included regions. Also makes sure the extra region is not unnecessarily high, and simplifies the logic to decide which regions should be added. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/setup.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 38fdffaa71d3..b85dceef56f7 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -182,24 +182,21 @@ char * __init xen_memory_setup(void) for (i = 0; i < memmap.nr_entries; i++) { unsigned long long end = map[i].addr + map[i].size; - if (map[i].type == E820_RAM) { - if (map[i].addr < mem_end && end > mem_end) { - /* Truncate region to max_mem. */ - u64 delta = end - mem_end; + if (map[i].type == E820_RAM && end > mem_end) { + /* RAM off the end - may be partially included */ + u64 delta = min(map[i].size, end - mem_end); - map[i].size -= delta; - extra_pages += PFN_DOWN(delta); + map[i].size -= delta; + end -= delta; - end = mem_end; - } + extra_pages += PFN_DOWN(delta); } - if (end > xen_extra_mem_start) + if (map[i].size > 0 && end > xen_extra_mem_start) xen_extra_mem_start = end; - /* If region is non-RAM or below mem_end, add what remains */ - if ((map[i].type != E820_RAM || map[i].addr < mem_end) && - map[i].size > 0) + /* Add region if any remains */ + if (map[i].size > 0) e820_add_region(map[i].addr, map[i].size, map[i].type); } -- cgit v1.2.3 From bc15fde77fc5d9ec2eec6066a5ab554ea1266a0a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 22 Nov 2010 17:17:50 -0800 Subject: xen: use default_idle We just need the idle loop to drop into safe_halt, which default_idle() is perfectly capable of doing. There's no need to duplicate it. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/setup.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index b85dceef56f7..95fb68a8c20d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -250,20 +250,6 @@ char * __init xen_memory_setup(void) return "Xen"; } -static void xen_idle(void) -{ - local_irq_disable(); - - if (need_resched()) - local_irq_enable(); - else { - current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); - safe_halt(); - current_thread_info()->status |= TS_POLLING; - } -} - /* * Set the bit indicating "nosegneg" library variants should be used. * We only need to bother in pure 32-bit mode; compat 32-bit processes @@ -360,7 +346,11 @@ void __init xen_arch_setup(void) MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); - pm_idle = xen_idle; + /* Set up idle, making sure it calls safe_halt() pvop */ +#ifdef CONFIG_X86_32 + boot_cpu_data.hlt_works_ok = 1; +#endif + pm_idle = default_idle; fiddle_vdso(); } -- cgit v1.2.3 From 932c42b286e2c6479d1cbdee2927cb283b1c0c3b Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 23 Nov 2010 22:27:55 +0000 Subject: ARM: avoid annoying <4>'s in printk output Adding KERN_WARNING in the middle of strings now produces those tokens in the output, rather than accepting the level as was once the case. Fix this in the one reported case. There might be more... Signed-off-by: Russell King --- arch/arm/mm/ioremap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 17e7b0b57e49..55c17a6fb22f 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -206,8 +206,8 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, */ if (pfn_valid(pfn)) { printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory. This leads\n" - KERN_WARNING "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" - KERN_WARNING "will fail in the next kernel release. Please fix your driver.\n"); + "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" + "will fail in the next kernel release. Please fix your driver.\n"); WARN_ON(1); } -- cgit v1.2.3 From 28c22d7dc99486ef4186dde41d5260e75b3076f7 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Tue, 23 Nov 2010 11:39:23 +0100 Subject: ARM: 6490/1: MM: bugfix: initialize spinlock for init_mm.context init_mm used at kernel/sched.c:idle_task_exit() has spin_lock (init_mm.context.id_lock) that is not initialized when spin_lock/unlock is called at an ARM machine. Note that mm_struct.context.id_lock is usually initialized except for the instance of init_mm at linux/arch/arm/mm/context.c Not initializing this spinlock incurs "BUG: pinlock bad magic" warning when spinlock debug is enabled. We have observed such instances when testing PM in S5PC210 machines. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Signed-off-by: Russell King --- arch/arm/include/asm/mmu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 68870c776671..b4ffe9d5b526 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -13,6 +13,10 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID #define ASID(mm) ((mm)->context.id & 255) + +/* init_mm.context.id_lock should be initialized. */ +#define INIT_MM_CONTEXT(name) \ + .context.id_lock = __SPIN_LOCK_UNLOCKED(name.context.id_lock), #else #define ASID(mm) (0) #endif -- cgit v1.2.3 From e8ee13a818db4954517cea7da6e7c15b9656eb00 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 19 Nov 2010 07:22:58 +0000 Subject: ARM: mach-shmobile: clock-sh7372: remove fsidiv bogus disable Current FSIDIV clock framework had bogus disable. This patch remove it. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7db31e6c6bf2..13226323e4e0 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -453,10 +453,8 @@ static int fsidiv_enable(struct clk *clk) unsigned long value; value = __raw_readl(clk->mapping->base) >> 16; - if (value < 2) { - fsidiv_disable(clk); + if (value < 2) return -ENOENT; - } __raw_writel((value << 16) | 0x3, clk->mapping->base); @@ -468,11 +466,6 @@ static int fsidiv_set_rate(struct clk *clk, { int idx; - if (clk->parent->rate == rate) { - fsidiv_disable(clk); - return 0; - } - idx = (clk->parent->rate / rate) & 0xffff; if (idx < 2) return -ENOENT; -- cgit v1.2.3 From d4bc99b977e3a1dd10a84a01ebe59ac2ccebf0cd Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 24 Nov 2010 02:44:06 +0000 Subject: ARM: mach-shmobile: ap4evb: FSI clock use proper process for HDMI Current AP4 FSI set_rate function used bogus clock process which didn't care enable/disable and clk->usecound. To solve this issue, this patch also modify FSI driver to call set_rate with enough options. This patch modify it. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 58 +++++++++++++++++++++++++++-------- arch/arm/mach-shmobile/clock-sh7372.c | 2 +- 2 files changed, 46 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index d3260542b943..61c1068198ec 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -567,40 +567,72 @@ static struct platform_device *qhd_devices[] __initdata = { /* FSI */ #define IRQ_FSI evt2irq(0x1840) +static int __fsi_set_rate(struct clk *clk, long rate, int enable) +{ + int ret = 0; + + if (rate <= 0) + return ret; -static int fsi_set_rate(int is_porta, int rate) + if (enable) { + ret = clk_set_rate(clk, clk_round_rate(clk, rate)); + if (0 == ret) + ret = clk_enable(clk); + } else { + clk_disable(clk); + } + + return ret; +} + +static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) { struct clk *fsib_clk; struct clk *fdiv_clk = &sh7372_fsidivb_clk; + long fsib_rate = 0; + long fdiv_rate = 0; + int ackmd_bpfmd; int ret; /* set_rate is not needed if port A */ if (is_porta) return 0; - fsib_clk = clk_get(NULL, "fsib_clk"); - if (IS_ERR(fsib_clk)) - return -EINVAL; - switch (rate) { case 44100: - clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000)); - ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; + fsib_rate = rate * 256; + ackmd_bpfmd = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; break; case 48000: - clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000)); - clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000)); - ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; + fsib_rate = 85428000; /* around 48kHz x 256 x 7 */ + fdiv_rate = rate * 256; + ackmd_bpfmd = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; break; default: pr_err("unsupported rate in FSI2 port B\n"); - ret = -EINVAL; - break; + return -EINVAL; } + /* FSI B setting */ + fsib_clk = clk_get(dev, "ickb"); + if (IS_ERR(fsib_clk)) + return -EIO; + + ret = __fsi_set_rate(fsib_clk, fsib_rate, enable); clk_put(fsib_clk); + if (ret < 0) + return ret; - return ret; + /* FSI DIV setting */ + ret = __fsi_set_rate(fdiv_clk, fdiv_rate, enable); + if (ret < 0) { + /* disable FSI B */ + if (enable) + __fsi_set_rate(fsib_clk, fsib_rate, 0); + return ret; + } + + return ackmd_bpfmd; } static struct sh_fsi_platform_info fsi_info = { diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 13226323e4e0..4191e2921127 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -471,7 +471,7 @@ static int fsidiv_set_rate(struct clk *clk, return -ENOENT; __raw_writel(idx << 16, clk->mapping->base); - return fsidiv_enable(clk); + return 0; } static struct clk_ops fsidiv_clk_ops = { -- cgit v1.2.3 From 22de4e1fe446794acaebdf19dcaff4256d659972 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 19 Nov 2010 07:23:17 +0000 Subject: ARM: mach-shmobile: ap4evb: FSI clock use proper process for ak4642 Current AP4 FSI didn't use set_rate for ak4642, and used dummy rate when init. And FSI driver was modified to always call set_rate. The user which are using FSI set_rate is only AP4 now. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 96 ++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 61c1068198ec..e084b423146e 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -575,7 +575,7 @@ static int __fsi_set_rate(struct clk *clk, long rate, int enable) return ret; if (enable) { - ret = clk_set_rate(clk, clk_round_rate(clk, rate)); + ret = clk_set_rate(clk, rate); if (0 == ret) ret = clk_enable(clk); } else { @@ -585,7 +585,56 @@ static int __fsi_set_rate(struct clk *clk, long rate, int enable) return ret; } -static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) +static int __fsi_set_round_rate(struct clk *clk, long rate, int enable) +{ + return __fsi_set_rate(clk, clk_round_rate(clk, rate), enable); +} + +static int fsi_ak4642_set_rate(struct device *dev, int rate, int enable) +{ + struct clk *fsia_ick; + struct clk *fsiack; + int ret = -EIO; + + fsia_ick = clk_get(dev, "icka"); + if (IS_ERR(fsia_ick)) + return PTR_ERR(fsia_ick); + + /* + * FSIACK is connected to AK4642, + * and use external clock pin from it. + * it is parent of fsia_ick now. + */ + fsiack = clk_get_parent(fsia_ick); + if (!fsiack) + goto fsia_ick_out; + + /* + * we get 1/1 divided clock by setting same rate to fsiack and fsia_ick + * + ** FIXME ** + * Because the freq_table of external clk (fsiack) are all 0, + * the return value of clk_round_rate became 0. + * So, it use __fsi_set_rate here. + */ + ret = __fsi_set_rate(fsiack, rate, enable); + if (ret < 0) + goto fsiack_out; + + ret = __fsi_set_round_rate(fsia_ick, rate, enable); + if ((ret < 0) && enable) + __fsi_set_round_rate(fsiack, rate, 0); /* disable FSI ACK */ + +fsiack_out: + clk_put(fsiack); + +fsia_ick_out: + clk_put(fsia_ick); + + return 0; +} + +static int fsi_hdmi_set_rate(struct device *dev, int rate, int enable) { struct clk *fsib_clk; struct clk *fdiv_clk = &sh7372_fsidivb_clk; @@ -594,10 +643,6 @@ static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) int ackmd_bpfmd; int ret; - /* set_rate is not needed if port A */ - if (is_porta) - return 0; - switch (rate) { case 44100: fsib_rate = rate * 256; @@ -618,23 +663,35 @@ static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) if (IS_ERR(fsib_clk)) return -EIO; - ret = __fsi_set_rate(fsib_clk, fsib_rate, enable); + ret = __fsi_set_round_rate(fsib_clk, fsib_rate, enable); clk_put(fsib_clk); if (ret < 0) return ret; /* FSI DIV setting */ - ret = __fsi_set_rate(fdiv_clk, fdiv_rate, enable); + ret = __fsi_set_round_rate(fdiv_clk, fdiv_rate, enable); if (ret < 0) { /* disable FSI B */ if (enable) - __fsi_set_rate(fsib_clk, fsib_rate, 0); + __fsi_set_round_rate(fsib_clk, fsib_rate, 0); return ret; } return ackmd_bpfmd; } +static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable) +{ + int ret; + + if (is_porta) + ret = fsi_ak4642_set_rate(dev, rate, enable); + else + ret = fsi_hdmi_set_rate(dev, rate, enable); + + return ret; +} + static struct sh_fsi_platform_info fsi_info = { .porta_flags = SH_FSI_BRS_INV | SH_FSI_OUT_SLAVE_MODE | @@ -928,23 +985,11 @@ out: device_initcall(hdmi_init_pm_clock); -#define FSIACK_DUMMY_RATE 48000 static int __init fsi_init_pm_clock(void) { struct clk *fsia_ick; int ret; - /* - * FSIACK is connected to AK4642, - * and the rate is depend on playing sound rate. - * So, set dummy rate (= 48k) here - */ - ret = clk_set_rate(&sh7372_fsiack_clk, FSIACK_DUMMY_RATE); - if (ret < 0) { - pr_err("Cannot set FSIACK dummy rate: %d\n", ret); - return ret; - } - fsia_ick = clk_get(&fsi_device.dev, "icka"); if (IS_ERR(fsia_ick)) { ret = PTR_ERR(fsia_ick); @@ -953,16 +998,9 @@ static int __init fsi_init_pm_clock(void) } ret = clk_set_parent(fsia_ick, &sh7372_fsiack_clk); - if (ret < 0) { - pr_err("Cannot set FSI-A parent: %d\n", ret); - goto out; - } - - ret = clk_set_rate(fsia_ick, FSIACK_DUMMY_RATE); if (ret < 0) - pr_err("Cannot set FSI-A rate: %d\n", ret); + pr_err("Cannot set FSI-A parent: %d\n", ret); -out: clk_put(fsia_ick); return ret; -- cgit v1.2.3 From a57b1a9bdfb61e8d7dc7acc5e2d8bc04c549e668 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 19 Nov 2010 07:23:26 +0000 Subject: ARM: mach-shmobile: clock-sh7372: modify error code Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 4191e2921127..41663e0ca104 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -454,7 +454,7 @@ static int fsidiv_enable(struct clk *clk) value = __raw_readl(clk->mapping->base) >> 16; if (value < 2) - return -ENOENT; + return -EIO; __raw_writel((value << 16) | 0x3, clk->mapping->base); @@ -468,7 +468,7 @@ static int fsidiv_set_rate(struct clk *clk, idx = (clk->parent->rate / rate) & 0xffff; if (idx < 2) - return -ENOENT; + return -EINVAL; __raw_writel(idx << 16, clk->mapping->base); return 0; -- cgit v1.2.3 From b16a2892b9852839307894cc429b7a7b145138a7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 19 Nov 2010 07:23:32 +0000 Subject: ARM: mach-shmobile: clock-sh7372: remove unnecessary fsi clocks Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 41663e0ca104..cbeca2849918 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -602,8 +602,6 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]), CLKDEV_CON_ID("fmsi_clk", &div6_clks[DIV6_FMSI]), CLKDEV_CON_ID("fmso_clk", &div6_clks[DIV6_FMSO]), - CLKDEV_CON_ID("fsia_clk", &div6_reparent_clks[DIV6_FSIA]), - CLKDEV_CON_ID("fsib_clk", &div6_reparent_clks[DIV6_FSIB]), CLKDEV_CON_ID("sub_clk", &div6_clks[DIV6_SUB]), CLKDEV_CON_ID("spu_clk", &div6_clks[DIV6_SPU]), CLKDEV_CON_ID("vou_clk", &div6_clks[DIV6_VOU]), -- cgit v1.2.3 From 421b446abeec55bed1251fab80cb5c12be58b773 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 19 Nov 2010 07:23:52 +0000 Subject: ARM: mach-shmobile: clock-sh7372: remove bogus pllc2 clock toggling. The PLLC2 clock was utilizing the same sort of enable/disable without regard to usecount approach that the FSIDIV clock was when being used as a PLL pass-through. This forces the enable/disable through the clock framework, which now prevents the clock from being ripped out or modified underneath users that have an existing handle on it. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 5 +++++ arch/arm/mach-shmobile/clock-sh7372.c | 12 ++---------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index e084b423146e..d440e5f456ad 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -969,6 +969,11 @@ static int __init hdmi_init_pm_clock(void) goto out; } + ret = clk_enable(&sh7372_pllc2_clk); + if (ret < 0) { + pr_err("Cannot enable pllc2 clock\n"); + goto out; + } pr_debug("PLLC2 set frequency %lu\n", rate); ret = clk_set_parent(hdmi_ick, &sh7372_pllc2_clk); diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index cbeca2849918..e18a1241a95e 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -230,21 +230,13 @@ static int pllc2_set_rate(struct clk *clk, if (idx < 0) return idx; - if (rate == clk->parent->rate) { - pllc2_disable(clk); - return 0; - } + if (rate == clk->parent->rate) + return -EINVAL; value = __raw_readl(PLLC2CR) & ~(0x3f << 24); - if (value & 0x80000000) - pllc2_disable(clk); - __raw_writel((value & ~0x80000000) | ((idx + 19) << 24), PLLC2CR); - if (value & 0x80000000) - return pllc2_enable(clk); - return 0; } -- cgit v1.2.3 From 5b5c1af104ab5adec1be9dcb4c787492d83d8d83 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 24 Nov 2010 12:09:41 +0000 Subject: xen: x86/32: perform initial startup on initial_page_table Only make swapper_pg_dir readonly and pinned when generic x86 architecture code (which also starts on initial_page_table) switches to it. This helps ensure that the generic setup paths work on Xen unmodified. In particular clone_pgd_range writes directly to the destination pgd entries and is used to initialise swapper_pg_dir so we need to ensure that it remains writeable until the last possible moment during bring up. This is complicated slightly by the need to avoid sharing kernel PMD entries when running under Xen, therefore the Xen implementation must make a copy of the kernel PMD (which is otherwise referred to by both intial_page_table and swapper_pg_dir) before switching to swapper_pg_dir. Signed-off-by: Ian Campbell Tested-by: Konrad Rzeszutek Wilk Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 2 -- arch/x86/xen/mmu.c | 69 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 56 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4d3861..ff82909801b6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1191,8 +1191,6 @@ asmlinkage void __init xen_start_kernel(void) /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); - init_mm.pgd = pgd; - /* keep using Xen gdt for now; no urgent need to change it */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7f75a5..c9cf23e17440 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2119,44 +2119,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, return pgd; } #else /* !CONFIG_X86_64 */ -static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); +static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); +static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); + +static __init void xen_write_cr3_init(unsigned long cr3) +{ + unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); + + BUG_ON(read_cr3() != __pa(initial_page_table)); + BUG_ON(cr3 != __pa(swapper_pg_dir)); + + /* + * We are switching to swapper_pg_dir for the first time (from + * initial_page_table) and therefore need to mark that page + * read-only and then pin it. + * + * Xen disallows sharing of kernel PMDs for PAE + * guests. Therefore we must copy the kernel PMD from + * initial_page_table into a new kernel PMD to be used in + * swapper_pg_dir. + */ + swapper_kernel_pmd = + extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); + memcpy(swapper_kernel_pmd, initial_kernel_pmd, + sizeof(pmd_t) * PTRS_PER_PMD); + swapper_pg_dir[KERNEL_PGD_BOUNDARY] = + __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); + set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); + + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + xen_write_cr3(cr3); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, + PFN_DOWN(__pa(initial_page_table))); + set_page_prot(initial_page_table, PAGE_KERNEL); + set_page_prot(initial_kernel_pmd, PAGE_KERNEL); + + pv_mmu_ops.write_cr3 = &xen_write_cr3; +} __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; - level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); + initial_kernel_pmd = + extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + xen_start_info->nr_pt_frames * PAGE_SIZE + 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - xen_map_identity_early(level2_kernel_pgt, max_pfn); + xen_map_identity_early(initial_kernel_pmd, max_pfn); - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + initial_page_table[KERNEL_PGD_BOUNDARY] = + __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); + set_page_prot(initial_page_table, PAGE_KERNEL_RO); set_page_prot(empty_zero_page, PAGE_KERNEL_RO); pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - xen_write_cr3(__pa(swapper_pg_dir)); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, + PFN_DOWN(__pa(initial_page_table))); + xen_write_cr3(__pa(initial_page_table)); memblock_x86_reserve_range(__pa(xen_start_info->pt_base), __pa(xen_start_info->pt_base + xen_start_info->nr_pt_frames * PAGE_SIZE), "XEN PAGETABLES"); - return swapper_pg_dir; + return initial_page_table; } #endif /* CONFIG_X86_64 */ @@ -2290,7 +2329,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .write_cr2 = xen_write_cr2, .read_cr3 = xen_read_cr3, +#ifdef CONFIG_X86_32 + .write_cr3 = xen_write_cr3_init, +#else .write_cr3 = xen_write_cr3, +#endif .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, -- cgit v1.2.3 From e6d4a76dbf2ff27314e09291dfb9e4afcb9ecd60 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sat, 20 Nov 2010 20:05:46 +0800 Subject: xen: remove duplicated #include Remove duplicated #include('s) in arch/x86/xen/setup.c Signed-off-by: Huang Weiyi Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 769c4b01fa32..d392486179e7 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include "xen-ops.h" -- cgit v1.2.3 From e5a06939736277c54a68ae275433db55b99d187c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 17:00:37 -0400 Subject: drivers/net/tile/: on-chip network drivers for the tile architecture This change adds the first network driver for the tile architecture, supporting the on-chip XGBE and GBE shims. The infrastructure is present for the TILE-Gx networking drivers (another three source files in the new directory) but for now the the actual tilegx sources are waiting on releasing hardware to initial customers. Note that arch/tile/include/hv/* are "upstream" headers from the Tilera hypervisor and will probably benefit less from LKML review. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/cacheflush.h | 52 + arch/tile/include/asm/processor.h | 10 + arch/tile/include/hv/drv_xgbe_impl.h | 300 ++++ arch/tile/include/hv/drv_xgbe_intf.h | 615 +++++++ arch/tile/include/hv/netio_errors.h | 122 ++ arch/tile/include/hv/netio_intf.h | 2975 ++++++++++++++++++++++++++++++++++ arch/tile/mm/init.c | 8 +- 7 files changed, 4080 insertions(+), 2 deletions(-) create mode 100644 arch/tile/include/hv/drv_xgbe_impl.h create mode 100644 arch/tile/include/hv/drv_xgbe_intf.h create mode 100644 arch/tile/include/hv/netio_errors.h create mode 100644 arch/tile/include/hv/netio_intf.h (limited to 'arch') diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index c5741da4eeac..14a3f8556ace 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h @@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size) mb_incoherent(); } +/* + * Flush & invalidate a VA range that is homed remotely on a single core, + * waiting until the memory controller holds the flushed values. + */ +static inline void finv_buffer_remote(void *buffer, size_t size) +{ + char *p; + int i; + + /* + * Flush and invalidate the buffer out of the local L1/L2 + * and request the home cache to flush and invalidate as well. + */ + __finv_buffer(buffer, size); + + /* + * Wait for the home cache to acknowledge that it has processed + * all the flush-and-invalidate requests. This does not mean + * that the flushed data has reached the memory controller yet, + * but it does mean the home cache is processing the flushes. + */ + __insn_mf(); + + /* + * Issue a load to the last cache line, which can't complete + * until all the previously-issued flushes to the same memory + * controller have also completed. If we weren't striping + * memory, that one load would be sufficient, but since we may + * be, we also need to back up to the last load issued to + * another memory controller, which would be the point where + * we crossed an 8KB boundary (the granularity of striping + * across memory controllers). Keep backing up and doing this + * until we are before the beginning of the buffer, or have + * hit all the controllers. + */ + for (i = 0, p = (char *)buffer + size - 1; + i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; + ++i) { + const unsigned long STRIPE_WIDTH = 8192; + + /* Force a load instruction to issue. */ + *(volatile char *)p; + + /* Jump to end of previous stripe. */ + p -= STRIPE_WIDTH; + p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); + } + + /* Wait for the loads (and thus flushes) to have completed. */ + __insn_mf(); +} + #endif /* _ASM_TILE_CACHEFLUSH_H */ diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 1747ff3946b2..a9e7c8760334 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -292,8 +292,18 @@ extern int kstack_hash; /* Are we using huge pages in the TLB for kernel data? */ extern int kdata_huge; +/* Support standard Linux prefetching. */ +#define ARCH_HAS_PREFETCH +#define prefetch(x) __builtin_prefetch(x) #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() +/* Bring a value into the L1D, faulting the TLB if necessary. */ +#ifdef __tilegx__ +#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x)) +#else +#define prefetch_L1(x) __insn_prefetch_L1((void *)(x)) +#endif + #else /* __ASSEMBLY__ */ /* Do some slow action (e.g. read a slow SPR). */ diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h new file mode 100644 index 000000000000..3a73b2b44913 --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_impl.h @@ -0,0 +1,300 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drivers/xgbe/impl.h + * Implementation details for the NetIO library. + */ + +#ifndef __DRV_XGBE_IMPL_H__ +#define __DRV_XGBE_IMPL_H__ + +#include +#include +#include + + +/** How many groups we have (log2). */ +#define LOG2_NUM_GROUPS (12) +/** How many groups we have. */ +#define NUM_GROUPS (1 << LOG2_NUM_GROUPS) + +/** Number of output requests we'll buffer per tile. */ +#define EPP_REQS_PER_TILE (32) + +/** Words used in an eDMA command without checksum acceleration. */ +#define EDMA_WDS_NO_CSUM 8 +/** Words used in an eDMA command with checksum acceleration. */ +#define EDMA_WDS_CSUM 10 +/** Total available words in the eDMA command FIFO. */ +#define EDMA_WDS_TOTAL 128 + + +/* + * FIXME: These definitions are internal and should have underscores! + * NOTE: The actual numeric values here are intentional and allow us to + * optimize the concept "if small ... else if large ... else ...", by + * checking for the low bit being set, and then for non-zero. + * These are used as array indices, so they must have the values (0, 1, 2) + * in some order. + */ +#define SIZE_SMALL (1) /**< Small packet queue. */ +#define SIZE_LARGE (2) /**< Large packet queue. */ +#define SIZE_JUMBO (0) /**< Jumbo packet queue. */ + +/** The number of "SIZE_xxx" values. */ +#define NETIO_NUM_SIZES 3 + + +/* + * Default numbers of packets for IPP drivers. These values are chosen + * such that CIPP1 will not overflow its L2 cache. + */ + +/** The default number of small packets. */ +#define NETIO_DEFAULT_SMALL_PACKETS 2750 +/** The default number of large packets. */ +#define NETIO_DEFAULT_LARGE_PACKETS 2500 +/** The default number of jumbo packets. */ +#define NETIO_DEFAULT_JUMBO_PACKETS 250 + + +/** Log2 of the size of a memory arena. */ +#define NETIO_ARENA_SHIFT 24 /* 16 MB */ +/** Size of a memory arena. */ +#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) + + +/** A queue of packets. + * + * This structure partially defines a queue of packets waiting to be + * processed. The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other part of the queue state, the read offset, is + * kept in user space, not in hypervisor space, so it is in a separate data + * structure. + * + * The read offset (__packet_receive_read in the user part of the queue + * structure) points to the next packet to be read. When the read offset is + * equal to the write offset, the queue is empty; therefore the queue must + * contain one more slot than the required maximum queue size. + * + * Here's an example of all 3 state variables and what they mean. All + * pointers move left to right. + * + * @code + * I I V V V V I I I I + * 0 1 2 3 4 5 6 7 8 9 10 + * ^ ^ ^ ^ + * | | | + * | | __last_packet_plus_one + * | __buffer_write + * __packet_receive_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one + * = 10). The read pointer is at 2, and the write pointer is at 6; thus, + * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining + * slots are invalid (do not contain a packet). + */ +typedef struct { + /** Byte offset of the next notify packet to be written: zero for the first + * packet on the queue, sizeof (netio_pkt_t) for the second packet on the + * queue, etc. */ + volatile uint32_t __packet_write; + + /** Offset of the packet after the last valid packet (i.e., when any + * pointer is incremented to this value, it wraps back to zero). */ + uint32_t __last_packet_plus_one; +} +__netio_packet_queue_t; + + +/** A queue of buffers. + * + * This structure partially defines a queue of empty buffers which have been + * obtained via requests to the IPP. (The elements of the queue are packet + * handles, which are transformed into a full netio_pkt_t when the buffer is + * retrieved.) The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other parts of the queue state, the read offset and + * requested write offset, are kept in user space, not in hypervisor space, so + * they are in a separate data structure. + * + * The read offset (__buffer_read in the user part of the queue structure) + * points to the next buffer to be read. When the read offset is equal to the + * write offset, the queue is empty; therefore the queue must contain one more + * slot than the required maximum queue size. + * + * The requested write offset (__buffer_requested_write in the user part of + * the queue structure) points to the slot which will hold the next buffer we + * request from the IPP, once we get around to sending such a request. When + * the requested write offset is equal to the write offset, no requests for + * new buffers are outstanding; when the requested write offset is one greater + * than the read offset, no more requests may be sent. + * + * Note that, unlike the packet_queue, the buffer_queue places incoming + * buffers at decreasing addresses. This makes the check for "is it time to + * wrap the buffer pointer" cheaper in the assembly code which receives new + * buffers, and means that the value which defines the queue size, + * __last_buffer, is different than in the packet queue. Also, the offset + * used in the packet_queue is already scaled by the size of a packet; here we + * use unscaled slot indices for the offsets. (These differences are + * historical, and in the future it's possible that the packet_queue will look + * more like this queue.) + * + * @code + * Here's an example of all 4 state variables and what they mean. Remember: + * all pointers move right to left. + * + * V V V I I R R V V V + * 0 1 2 3 4 5 6 7 8 9 + * ^ ^ ^ ^ + * | | | | + * | | | __last_buffer + * | | __buffer_write + * | __buffer_requested_write + * __buffer_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). + * The read pointer is at 2, and the write pointer is at 6; thus, there are + * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write + * pointer is at 4; thus, requests have been made to the IPP for buffers which + * will be placed in slots 6 and 5 when they arrive. Finally, the remaining + * slots are invalid (do not contain a buffer). + */ +typedef struct +{ + /** Ordinal number of the next buffer to be written: 0 for the first slot in + * the queue, 1 for the second slot in the queue, etc. */ + volatile uint32_t __buffer_write; + + /** Ordinal number of the last buffer (i.e., when any pointer is decremented + * below zero, it is reloaded with this value). */ + uint32_t __last_buffer; +} +__netio_buffer_queue_t; + + +/** + * An object for providing Ethernet packets to a process. + */ +typedef struct __netio_queue_impl_t +{ + /** The queue of packets waiting to be received. */ + __netio_packet_queue_t __packet_receive_queue; + /** The intr bit mask that IDs this device. */ + unsigned int __intr_id; + /** Offset to queues of empty buffers, one per size. */ + uint32_t __buffer_queue[NETIO_NUM_SIZES]; + /** The address of the first EPP tile, or -1 if no EPP. */ + /* ISSUE: Actually this is always "0" or "~0". */ + uint32_t __epp_location; + /** The queue ID that this queue represents. */ + unsigned int __queue_id; + /** Number of acknowledgements received. */ + volatile uint32_t __acks_received; + /** Last completion number received for packet_sendv. */ + volatile uint32_t __last_completion_rcv; + /** Number of packets allowed to be outstanding. */ + uint32_t __max_outstanding; + /** First VA available for packets. */ + void* __va_0; + /** First VA in second range available for packets. */ + void* __va_1; + /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ + uint32_t __padding[3]; + /** The packets themselves. */ + netio_pkt_t __packets[0]; +} +netio_queue_impl_t; + + +/** + * An object for managing the user end of a NetIO queue. + */ +typedef struct __netio_queue_user_impl_t +{ + /** The next incoming packet to be read. */ + uint32_t __packet_receive_read; + /** The next empty buffers to be read, one index per size. */ + uint8_t __buffer_read[NETIO_NUM_SIZES]; + /** Where the empty buffer we next request from the IPP will go, one index + * per size. */ + uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; + /** PCIe interface flag. */ + uint8_t __pcie; + /** Number of packets left to be received before we send a credit update. */ + uint32_t __receive_credit_remaining; + /** Value placed in __receive_credit_remaining when it reaches zero. */ + uint32_t __receive_credit_interval; + /** First fast I/O routine index. */ + uint32_t __fastio_index; + /** Number of acknowledgements expected. */ + uint32_t __acks_outstanding; + /** Last completion number requested. */ + uint32_t __last_completion_req; + /** File descriptor for driver. */ + int __fd; +} +netio_queue_user_impl_t; + + +#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ +#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ + + +/** Internal structure used to convey packet send information to the + * hypervisor. FIXME: Actually, it's not used for that anymore, but + * netio_packet_send() still uses it internally. + */ +typedef struct +{ + uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ + uint16_t transfer_size; /**< Size of packet */ + uint32_t va; /**< VA of start of packet */ + __netio_pkt_handle_t handle; /**< Packet handle */ + uint32_t csum0; /**< First checksum word */ + uint32_t csum1; /**< Second checksum word */ +} +__netio_send_cmd_t; + + +/** Flags used in two contexts: + * - As the "flags" member in the __netio_send_cmd_t, above; used only + * for netio_pkt_send_{prepare,commit}. + * - As part of the flags passed to the various send packet fast I/O calls. + */ + +/** Need acknowledgement on this packet. Note that some code in the + * normal send_pkt fast I/O handler assumes that this is equal to 1. */ +#define __NETIO_SEND_FLG_ACK 0x1 + +/** Do checksum on this packet. (Only used with the __netio_send_cmd_t; + * normal packet sends use a special fast I/O index to denote checksumming, + * and multi-segment sends test the checksum descriptor.) */ +#define __NETIO_SEND_FLG_CSUM 0x2 + +/** Get a completion on this packet. Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_COMPLETION 0x4 + +/** Position of the number-of-extra-segments value in the flags word. + Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_XSEG_SHIFT 3 + +/** Width of the number-of-extra-segments value in the flags word. */ +#define __NETIO_SEND_FLG_XSEG_WIDTH 2 + +#endif /* __DRV_XGBE_IMPL_H__ */ diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h new file mode 100644 index 000000000000..146e47d5334b --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_intf.h @@ -0,0 +1,615 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_xgbe_intf.h + * Interface to the hypervisor XGBE driver. + */ + +#ifndef __DRV_XGBE_INTF_H__ +#define __DRV_XGBE_INTF_H__ + +/** + * An object for forwarding VAs and PAs to the hypervisor. + * @ingroup types + * + * This allows the supervisor to specify a number of areas of memory to + * store packet buffers. + */ +typedef struct +{ + /** The physical address of the memory. */ + HV_PhysAddr pa; + /** Page table entry for the memory. This is only used to derive the + * memory's caching mode; the PA bits are ignored. */ + HV_PTE pte; + /** The virtual address of the memory. */ + HV_VirtAddr va; + /** Size (in bytes) of the memory area. */ + int size; + +} +netio_ipp_address_t; + +/** The various pread/pwrite offsets into the hypervisor-level driver. + * @ingroup types + */ +typedef enum +{ + /** Inform the Linux driver of the address of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA address. */ + NETIO_FIXED_ADDR = 0x5000000000000000ULL, + + /** Inform the Linux driver of the size of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA size. */ + NETIO_FIXED_SIZE = 0x5100000000000000ULL, + + /** Register current tile with IPP. Write then read: write, takes a + * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */ + NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL, + + /** Unregister current tile from IPP. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL, + + /** Start packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL, + + /** Stop packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL, + + /** Configure group (typically we group on VLAN). Write-only: takes an + * array of netio_group_t's, low 24 bits of the offset is the base group + * number times the size of a netio_group_t. */ + NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL, + + /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low + * 24 bits of the offset is the base bucket number times the size of a + * netio_bucket_t. */ + NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL, + + /** Get/set a parameter. Read or write: read or write data is the parameter + * value, low 32 bits of the offset is a __netio_getset_offset_t. */ + NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL, + + /** Get fast I/O index. Read-only; returns a 4-byte base index value. */ + NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL, + + /** Configure hijack IP address. Packets with this IPv4 dest address + * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address + * in some standard form. FIXME: Define the form! */ + NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL, + + /** + * Offsets beyond this point are reserved for the supervisor (although that + * enforcement must be done by the supervisor driver itself). + */ + NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL, + + /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL, + + /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL, + + /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux + * userspace code due to limitations in the pread/pwrite syscalls. */ + + /** Drain LIPP buffers. */ + NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL, + + /** Supply a netio_ipp_address_t to be used as shared memory for the + * LEPP command queue. */ + NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL, + + /* 0xFC... is currently unused. */ + + /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL, + + /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL, + + /** Supply packet arena. Write-only, takes an array of + * netio_ipp_address_t values. */ + NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL, +} netio_hv_offset_t; + +/** Extract the base offset from an offset */ +#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL) +/** Extract the local offset from an offset */ +#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL) + + +/** + * Get/set offset. + */ +typedef union +{ + struct + { + uint64_t addr:48; /**< Class-specific address */ + unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */ + unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */ + } + bits; /**< Bitfields */ + uint64_t word; /**< Aggregated value to use as the offset */ +} +__netio_getset_offset_t; + +/** + * Fast I/O index offsets (must be contiguous). + */ +typedef enum +{ + NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */ + NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */ + NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */ + NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */ + NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */ + NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */ + NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */ + NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */ +} netio_fastio_index_t; + +/** 3-word return type for Fast I/O call. */ +typedef struct +{ + int err; /**< Error code. */ + uint32_t val0; /**< Value. Meaning depends upon the specific call. */ + uint32_t val1; /**< Value. Meaning depends upon the specific call. */ +} netio_fastio_rv3_t; + +/** 0-argument fast I/O call */ +int __netio_fastio0(uint32_t fastio_index); +/** 1-argument fast I/O call */ +int __netio_fastio1(uint32_t fastio_index, uint32_t arg0); +/** 3-argument fast I/O call, 2-word return value */ +netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0, + uint32_t arg1, uint32_t arg2); +/** 4-argument fast I/O call */ +int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3); +/** 6-argument fast I/O call */ +int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5); +/** 9-argument fast I/O call */ +int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5, + uint32_t arg6, uint32_t arg7, uint32_t arg8); + +/** Allocate an empty packet. + * @param fastio_index Fast I/O index. + * @param size Size of the packet to allocate. + */ +#define __netio_fastio_allocate(fastio_index, size) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size) + +/** Free a buffer. + * @param fastio_index Fast I/O index. + * @param handle Handle for the packet to free. + */ +#define __netio_fastio_free_buffer(fastio_index, handle) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle) + +/** Increment our receive credits. + * @param fastio_index Fast I/O index. + * @param credits Number of credits to add. + */ +#define __netio_fastio_return_credits(fastio_index, credits) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits) + +/** Send packet, no checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + */ +#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \ + __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \ + size, va, handle) + +/** Send packet, calculate checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + * @param csum0 Shim checksum header. + * @param csum1 Checksum seed. + */ +#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \ + csum0, csum1) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \ + size, va, handle, csum0, csum1) + + +/** Format for the "csum0" argument to the __netio_fastio_send routines + * and LEPP. Note that this is currently exactly identical to the + * ShimProtocolOffloadHeader. + */ +typedef union +{ + struct + { + unsigned int start_byte:7; /**< The first byte to be checksummed */ + unsigned int count:14; /**< Number of bytes to be checksummed. */ + unsigned int destination_byte:7; /**< The byte to write the checksum to. */ + unsigned int reserved:4; /**< Reserved. */ + } bits; /**< Decomposed method of access. */ + unsigned int word; /**< To send out the IDN. */ +} __netio_checksum_header_t; + + +/** Sendv packet with 1 or 2 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment, if 2 segments. + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment, if 2 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Send packet on PCIe interface. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe. + * @param va_F Virtual address of the packet buffer. + * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0. + * @param len_F_L Length of the packet buffer in low 16 bits. + */ +#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Sendv packet with 3 or 4 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment (third segment if 3 segments, + * fourth segment if 4 segments). + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment in high 16 bits. + * @param va_M0 Virtual address of "middle 0" segment; this segment is sent + * second when there are three segments, and third if there are four. + * @param va_M1 Virtual address of "middle 1" segment; this segment is sent + * second when there are four segments. + * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle + * 1 segment, if 4 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \ + va_L, len_F_L, va_M0, va_M1, len_M0_M1) \ + __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1) + +/** Send vector of packets. + * @param fastio_index Fast I/O index. + * @param seqno Number of packets transmitted so far on this interface; + * used to decide which packets should be acknowledged. + * @param nentries Number of entries in vector. + * @param va Virtual address of start of vector entry array. + * @return 3-word netio_fastio_rv3_t structure. The structure's err member + * is an error code, or zero if no error. The val0 member is the + * updated value of seqno; it has been incremented by 1 for each + * packet sent. That increment may be less than nentries if an + * error occured, or if some of the entries in the vector contain + * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the + * updated value of nentries; it has been decremented by 1 for each + * vector entry processed. Again, that decrement may be less than + * nentries (leaving the returned value positive) if an error + * occurred. + */ +#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \ + __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \ + nentries, va) + + +/** An egress DMA command for LEPP. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 0, to distinguish it from + * lepp_tso_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 3; + + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint8_t hash_for_home : 1; + + /** Should we compute a checksum? */ + uint8_t compute_checksum : 1; + + /** Is this the final buffer for this packet? + * + * A single packet can be split over several input buffers (a "gather" + * operation). This flag indicates that this is the last buffer + * in a packet. + */ + uint8_t end_of_packet : 1; + + /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */ + uint8_t send_completion : 1; + + /** High bits of Client Physical Address of the start of the buffer + * to be egressed. + * + * NOTE: Only 6 bits are actually needed here, as CPAs are + * currently 38 bits. So two bits could be scavenged from this. + */ + uint8_t cpa_hi; + + /** The number of bytes to be egressed. */ + uint16_t length; + + /** Low 32 bits of Client Physical Address of the start of the buffer + * to be egressed. + */ + uint32_t cpa_lo; + + /** Checksum information (only used if 'compute_checksum'). */ + __netio_checksum_header_t checksum_data; + +} lepp_cmd_t; + + +/** A chunk of physical memory for a TSO egress. */ +typedef struct +{ + /** The low bits of the CPA. */ + uint32_t cpa_lo; + /** The high bits of the CPA. */ + uint16_t cpa_hi : 15; + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint16_t hash_for_home : 1; + /** The length in bytes. */ + uint16_t length; +} lepp_frag_t; + + +/** An LEPP command that handles TSO. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 1, to distinguish it from + * lepp_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 7; + + /** Size of the header[] array in bytes. It must be in the range + * [40, 127], which are the smallest header for a TCP packet over + * Ethernet and the maximum possible prepend size supported by + * hardware, respectively. Note that the array storage must be + * padded out to a multiple of four bytes so that the following + * LEPP command is aligned properly. + */ + uint8_t header_size; + + /** Byte offset of the IP header in header[]. */ + uint8_t ip_offset; + + /** Byte offset of the TCP header in header[]. */ + uint8_t tcp_offset; + + /** The number of bytes to use for the payload of each packet, + * except of course the last one, which may not have enough bytes. + * This means that each Ethernet packet except the last will have a + * size of header_size + payload_size. + */ + uint16_t payload_size; + + /** The length of the 'frags' array that follows this struct. */ + uint16_t num_frags; + + /** The actual frags. */ + lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */]; + + /* + * The packet header template logically follows frags[], + * but you can't declare that in C. + * + * uint32_t header[header_size_in_words_rounded_up]; + */ + +} lepp_tso_cmd_t; + + +/** An LEPP completion ring entry. */ +typedef void* lepp_comp_t; + + +/** Maximum number of frags for one TSO command. This is adapted from + * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for + * our page size of exactly 65536. We add one for a "body" fragment. + */ +#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) + +/** Total number of bytes needed for an lepp_tso_cmd_t. */ +#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ + (sizeof(lepp_tso_cmd_t) + \ + (num_frags) * sizeof(lepp_frag_t) + \ + (((header_size) + 3) & -4)) + +/** The size of the lepp "cmd" queue. */ +#define LEPP_CMD_QUEUE_BYTES \ + (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \ + (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t)) + +/** The largest possible command that can go in lepp_queue_t::cmds[]. */ +#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128) + +/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive). + */ +#define LEPP_CMD_LIMIT \ + (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE) + +/** The maximum number of completions in an LEPP queue. */ +#define LEPP_COMP_QUEUE_SIZE \ + ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t)) + +/** Increment an index modulo the queue size. */ +#define LEPP_QINC(var) \ + (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1)) + +/** A queue used to convey egress commands from the client to LEPP. */ +typedef struct +{ + /** Index of first completion not yet processed by user code. + * If this is equal to comp_busy, there are no such completions. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_head; + + /** Index of first completion record not yet completed. + * If this is equal to comp_tail, there are no such completions. + * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever + * a command with the 'completion' bit set is finished. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int comp_busy; + + /** Index of the first empty slot in the completion ring. + * Entries from this up to but not including comp_head (in ring order) + * can be filled in with completion data. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_tail; + + /** Byte index of first command enqueued for LEPP but not yet processed. + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: LEPP advances this counter as soon as it no longer needs + * the cmds[] storage for this entry, but the transfer is not actually + * complete (i.e. the buffer pointed to by the command is no longer + * needed) until comp_busy advances. + * + * If this is equal to cmd_tail, the ring is empty. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int cmd_head; + + /** Byte index of first empty slot in the command ring. This field can + * be incremented up to but not equal to cmd_head (because that would + * mean the ring is empty). + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: This is read/written by the user, only read by LEPP. + */ + volatile unsigned int cmd_tail; + + /** A ring of variable-sized egress DMA commands. + * + * NOTE: Only written by the user, only read by LEPP. + */ + char cmds[LEPP_CMD_QUEUE_BYTES] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); + + /** A ring of user completion data. + * NOTE: Only read/written by the user. + */ + lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); +} lepp_queue_t; + + +/** An internal helper function for determining the number of entries + * available in a ring buffer, given that there is one sentinel. + */ +static inline unsigned int +_lepp_num_free_slots(unsigned int head, unsigned int tail) +{ + /* + * One entry is reserved for use as a sentinel, to distinguish + * "empty" from "full". So we compute + * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation. + */ + return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0); +} + + +/** Returns how many new comp entries can be enqueued. */ +static inline unsigned int +lepp_num_free_comp_slots(const lepp_queue_t* q) +{ + return _lepp_num_free_slots(q->comp_head, q->comp_tail); +} + +static inline int +lepp_qsub(int v1, int v2) +{ + int delta = v1 - v2; + return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE); +} + + +/** FIXME: Check this from linux, via a new "pwrite()" call. */ +#define LIPP_VERSION 1 + + +/** We use exactly two bytes of alignment padding. */ +#define LIPP_PACKET_PADDING 2 + +/** The minimum size of a "small" buffer (including the padding). */ +#define LIPP_SMALL_PACKET_SIZE 128 + +/* + * NOTE: The following two values should total to less than around + * 13582, to keep the total size used for "lipp_state_t" below 64K. + */ + +/** The maximum number of "small" buffers. + * This is enough for 53 network cpus with 128 credits. Note that + * if these are exhausted, we will fall back to using large buffers. + */ +#define LIPP_SMALL_BUFFERS 6785 + +/** The maximum number of "large" buffers. + * This is enough for 53 network cpus with 128 credits. + */ +#define LIPP_LARGE_BUFFERS 6785 + +#endif /* __DRV_XGBE_INTF_H__ */ diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h new file mode 100644 index 000000000000..e1591bff61b5 --- /dev/null +++ b/arch/tile/include/hv/netio_errors.h @@ -0,0 +1,122 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Error codes returned from NetIO routines. + */ + +#ifndef __NETIO_ERRORS_H__ +#define __NETIO_ERRORS_H__ + +/** + * @addtogroup error + * + * @brief The error codes returned by NetIO functions. + * + * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and + * a negative value if an error occurs. + * + * In cases where a NetIO function failed due to a error reported by + * system libraries, the error code will be the negation of the + * system errno at the time of failure. The @ref netio_strerror() + * function will deliver error strings for both NetIO and system error + * codes. + * + * @{ + */ + +/** The set of all NetIO errors. */ +typedef enum +{ + /** Operation successfully completed. */ + NETIO_NO_ERROR = 0, + + /** A packet was successfully retrieved from an input queue. */ + NETIO_PKT = 0, + + /** Largest NetIO error number. */ + NETIO_ERR_MAX = -701, + + /** The tile is not registered with the IPP. */ + NETIO_NOT_REGISTERED = -701, + + /** No packet was available to retrieve from the input queue. */ + NETIO_NOPKT = -702, + + /** The requested function is not implemented. */ + NETIO_NOT_IMPLEMENTED = -703, + + /** On a registration operation, the target queue already has the maximum + * number of tiles registered for it, and no more may be added. On a + * packet send operation, the output queue is full and nothing more can + * be queued until some of the queued packets are actually transmitted. */ + NETIO_QUEUE_FULL = -704, + + /** The calling process or thread is not bound to exactly one CPU. */ + NETIO_BAD_AFFINITY = -705, + + /** Cannot allocate memory on requested controllers. */ + NETIO_CANNOT_HOME = -706, + + /** On a registration operation, the IPP specified is not configured + * to support the options requested; for instance, the application + * wants a specific type of tagged headers which the configured IPP + * doesn't support. Or, the supplied configuration information is + * not self-consistent, or is out of range; for instance, specifying + * both NETIO_RECV and NETIO_NO_RECV, or asking for more than + * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket + * configure operation, the number of items, or the base item, was + * out of range. + */ + NETIO_BAD_CONFIG = -707, + + /** Too many tiles have registered to transmit packets. */ + NETIO_TOOMANY_XMIT = -708, + + /** Packet transmission was attempted on a queue which was registered + with transmit disabled. */ + NETIO_UNREG_XMIT = -709, + + /** This tile is already registered with the IPP. */ + NETIO_ALREADY_REGISTERED = -710, + + /** The Ethernet link is down. The application should try again later. */ + NETIO_LINK_DOWN = -711, + + /** An invalid memory buffer has been specified. This may be an unmapped + * virtual address, or one which does not meet alignment requirements. + * For netio_input_register(), this error may be returned when multiple + * processes specify different memory regions to be used for NetIO + * buffers. That can happen if these processes specify explicit memory + * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init() + * has not been called by a common ancestor of the processes. + */ + NETIO_FAULT = -712, + + /** Cannot combine user-managed shared memory and cache coherence. */ + NETIO_BAD_CACHE_CONFIG = -713, + + /** Smallest NetIO error number. */ + NETIO_ERR_MIN = -713, + +#ifndef __DOXYGEN__ + /** Used internally to mean that no response is needed; never returned to + * an application. */ + NETIO_NO_RESPONSE = 1 +#endif +} netio_error_t; + +/** @} */ + +#endif /* __NETIO_ERRORS_H__ */ diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h new file mode 100644 index 000000000000..8d20972aba2c --- /dev/null +++ b/arch/tile/include/hv/netio_intf.h @@ -0,0 +1,2975 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * NetIO interface structures and macros. + */ + +#ifndef __NETIO_INTF_H__ +#define __NETIO_INTF_H__ + +#include + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) +#include +#define netio_assert assert /**< Enable assertions from macros */ +#else +#define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */ +#endif + +/* + * If none of these symbols are defined, we're building libnetio in an + * environment where we have pthreads, so we'll enable locking. + */ +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \ + !defined(__NEWLIB__) +#define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */ + +/* + * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on + * per-packet NetIO operations. We still do pthread locking on things + * like netio_input_register, though. This is used for building + * libnetio_unlocked. + */ +#ifndef NETIO_UNLOCKED + +/* Avoid PLT overhead by using our own inlined per-cpu lock. */ +#include +typedef int _netio_percpu_mutex_t; + +static __inline int +_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +static __inline int +_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock) +{ + while (__builtin_expect(__insn_tns(lock), 0)) + sched_yield(); + return 0; +} + +static __inline int +_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +#else /* NETIO_UNLOCKED */ + +/* Don't do any locking for per-packet NetIO operations. */ +typedef int _netio_percpu_mutex_t; +#define _netio_percpu_mutex_init(L) +#define _netio_percpu_mutex_lock(L) +#define _netio_percpu_mutex_unlock(L) + +#endif /* NETIO_UNLOCKED */ +#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */ + +/** How many tiles can register for a given queue. + * @ingroup setup */ +#define NETIO_MAX_TILES_PER_QUEUE 64 + + +/** Largest permissible queue identifier. + * @ingroup setup */ +#define NETIO_MAX_QUEUE_ID 255 + + +#ifndef __DOXYGEN__ + +/* Metadata packet checksum/ethertype flags. */ + +/** The L4 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L4_CSUM_SHIFT 0 +#define _NETIO_PKT_NO_L4_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L4_CSUM_MASK \ + (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT) + +/** The L3 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L3_CSUM_SHIFT 1 +#define _NETIO_PKT_NO_L3_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L3_CSUM_MASK \ + (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT) + +/** The L3 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2 +#define _NETIO_PKT_BAD_L3_CSUM_RMASK 1 +#define _NETIO_PKT_BAD_L3_CSUM_MASK \ + (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT) + +/** The Ethernet packet type is unrecognized. */ +#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \ + (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \ + _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT) + +/* Metadata packet type flags. */ + +/** Where the packet type bits are; this field is the index into + * _netio_pkt_info. */ +#define _NETIO_PKT_TYPE_SHIFT 4 +#define _NETIO_PKT_TYPE_RMASK 0x3F + +/** How many VLAN tags the packet has, and, if we have two, which one we + * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom) + * tag is counted here. */ +#define _NETIO_PKT_VLAN_SHIFT 4 +#define _NETIO_PKT_VLAN_RMASK 0x3 +#define _NETIO_PKT_VLAN_MASK \ + (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT) +#define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */ +#define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */ +#define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */ +#define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */ + +/** Which proprietary tags the packet has. */ +#define _NETIO_PKT_TAG_SHIFT 6 +#define _NETIO_PKT_TAG_RMASK 0x3 +#define _NETIO_PKT_TAG_MASK \ + (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT) +#define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */ +#define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */ +#define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */ +#define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */ + +/** Whether a packet has an LLC + SNAP header. */ +#define _NETIO_PKT_SNAP_SHIFT 8 +#define _NETIO_PKT_SNAP_RMASK 0x1 +#define _NETIO_PKT_SNAP_MASK \ + (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT) + +/* NOTE: Bits 9 and 10 are unused. */ + +/** Length of any custom data before the L2 header, in words. */ +#define _NETIO_PKT_CUSTOM_LEN_SHIFT 11 +#define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F +#define _NETIO_PKT_CUSTOM_LEN_MASK \ + (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT) + +/** The L4 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16 +#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1 +#define _NETIO_PKT_BAD_L4_CSUM_MASK \ + (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT) + +/** Length of the L2 header, in words. */ +#define _NETIO_PKT_L2_LEN_SHIFT 17 +#define _NETIO_PKT_L2_LEN_RMASK 0x1F +#define _NETIO_PKT_L2_LEN_MASK \ + (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT) + + +/* Flags in minimal packet metadata. */ + +/** We need an eDMA checksum on this packet. */ +#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0 +#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1 +#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \ + (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT) + +/* Data within the packet information table. */ + +/* Note that, for efficiency, code which uses these fields assumes that none + * of the shift values below are zero. See uses below for an explanation. */ + +/** Offset within the L2 header of the innermost ethertype (in halfwords). */ +#define _NETIO_PKT_INFO_ETYPE_SHIFT 6 +#define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F + +/** Offset within the L2 header of the VLAN tag (in halfwords). */ +#define _NETIO_PKT_INFO_VLAN_SHIFT 11 +#define _NETIO_PKT_INFO_VLAN_RMASK 0x1F + +#endif + + +/** The size of a memory buffer representing a small packet. + * @ingroup egress */ +#define SMALL_PACKET_SIZE 256 + +/** The size of a memory buffer representing a large packet. + * @ingroup egress */ +#define LARGE_PACKET_SIZE 2048 + +/** The size of a memory buffer representing a jumbo packet. + * @ingroup egress */ +#define JUMBO_PACKET_SIZE (12 * 1024) + + +/* Common ethertypes. + * @ingroup ingress */ +/** @{ */ +/** The ethertype of IPv4. */ +#define ETHERTYPE_IPv4 (0x0800) +/** The ethertype of ARP. */ +#define ETHERTYPE_ARP (0x0806) +/** The ethertype of VLANs. */ +#define ETHERTYPE_VLAN (0x8100) +/** The ethertype of a Q-in-Q header. */ +#define ETHERTYPE_Q_IN_Q (0x9100) +/** The ethertype of IPv6. */ +#define ETHERTYPE_IPv6 (0x86DD) +/** The ethertype of MPLS. */ +#define ETHERTYPE_MPLS (0x8847) +/** @} */ + + +/** The possible return values of NETIO_PKT_STATUS. + * @ingroup ingress + */ +typedef enum +{ + /** No problems were detected with this packet. */ + NETIO_PKT_STATUS_OK, + /** The packet is undersized; this is expected behavior if the packet's + * ethertype is unrecognized, but otherwise the packet is likely corrupt. */ + NETIO_PKT_STATUS_UNDERSIZE, + /** The packet is oversized and some trailing bytes have been discarded. + This is expected behavior for short packets, since it's impossible to + precisely determine the amount of padding which may have been added to + them to make them meet the minimum Ethernet packet size. */ + NETIO_PKT_STATUS_OVERSIZE, + /** The packet was judged to be corrupt by hardware (for instance, it had + a bad CRC, or part of it was discarded due to lack of buffer space in + the I/O shim) and should be discarded. */ + NETIO_PKT_STATUS_BAD +} netio_pkt_status_t; + + +/** Log2 of how many buckets we have. */ +#define NETIO_LOG2_NUM_BUCKETS (10) + +/** How many buckets we have. + * @ingroup ingress */ +#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS) + + +/** + * @brief A group-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given group. + */ +typedef union { + /** The header broken down into bits. */ + struct { + /** Whether we should balance on L4, if available */ + unsigned int __balance_on_l4:1; + /** Whether we should balance on L3, if available */ + unsigned int __balance_on_l3:1; + /** Whether we should balance on L2, if available */ + unsigned int __balance_on_l2:1; + /** Reserved for future use */ + unsigned int __reserved:1; + /** The base bucket to use to send traffic */ + unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS; + /** The mask to apply to the balancing value. This must be one less + * than a power of two, e.g. 0x3 or 0xFF. + */ + unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS; + /** Pad to 32 bits */ + unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS); + } bits; + /** To send out the IDN. */ + unsigned int word; +} +netio_group_t; + + +/** + * @brief A VLAN-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given VLAN. + */ +typedef netio_group_t netio_vlan_t; + + +/** + * A bucket-to-queue mapping. + * @ingroup setup + */ +typedef unsigned char netio_bucket_t; + + +/** + * A packet size can always fit in a netio_size_t. + * @ingroup setup + */ +typedef unsigned int netio_size_t; + + +/** + * @brief Ethernet standard (ingress) packet metadata. + * + * @ingroup ingress + * + * This is additional data associated with each packet. + * This structure is opaque and accessed through the @ref ingress. + * + * Also, the buffer population operation currently assumes that standard + * metadata is at least as large as minimal metadata, and will need to be + * modified if that is no longer the case. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[24]; +#else + /** The overall ordinal of the packet */ + unsigned int __packet_ordinal; + /** The ordinal of the packet within the group */ + unsigned int __group_ordinal; + /** The best flow hash IPP could compute. */ + unsigned int __flow_hash; + /** Flags pertaining to checksum calculation, packet type, etc. */ + unsigned int __flags; + /** The first word of "user data". */ + unsigned int __user_data_0; + /** The second word of "user data". */ + unsigned int __user_data_1; +#endif +} +netio_pkt_metadata_t; + + +/** To ensure that the L3 header is aligned mod 4, the L2 header should be + * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes + * long. The standard way to do this is to simply add 2 bytes of padding + * before the L2 header. + */ +#define NETIO_PACKET_PADDING 2 + + + +/** + * @brief Ethernet minimal (egress) packet metadata. + * + * @ingroup egress + * + * This structure represents information about packets which have + * been processed by @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer(). This structure is opaque + * and accessed through the @ref egress. + * + * @internal This structure is actually copied into the memory used by + * standard metadata, which is assumed to be large enough. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[14]; +#else + /** The offset of the L2 header from the start of the packet data. */ + unsigned short l2_offset; + /** The offset of the L3 header from the start of the packet data. */ + unsigned short l3_offset; + /** Where to write the checksum. */ + unsigned char csum_location; + /** Where to start checksumming from. */ + unsigned char csum_start; + /** Flags pertaining to checksum calculation etc. */ + unsigned short flags; + /** The L2 length of the packet. */ + unsigned short l2_length; + /** The checksum with which to seed the checksum generator. */ + unsigned short csum_seed; + /** How much to checksum. */ + unsigned short csum_length; +#endif +} +netio_pkt_minimal_metadata_t; + + +#ifndef __DOXYGEN__ + +/** + * @brief An I/O notification header. + * + * This is the first word of data received from an I/O shim in a notification + * packet. It contains framing and status information. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + unsigned int __channel:7; /**< Resource channel. */ + unsigned int __type:4; /**< Type. */ + unsigned int __ack:1; /**< Whether an acknowledgement is needed. */ + unsigned int __reserved:1; /**< Reserved. */ + unsigned int __protocol:1; /**< A protocol-specific word is added. */ + unsigned int __status:2; /**< Status of the transfer. */ + unsigned int __framing:2; /**< Framing of the transfer. */ + unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */ + } bits; +} +__netio_pkt_notif_t; + + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_HANDLE_BASE(p) \ + ((unsigned char*)((p).word & 0xFFFFFFC0)) + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_BASE(p) \ + _NETIO_PKT_HANDLE_BASE(p->__packet) + +/** + * @brief An I/O notification packet (second word) + * + * This is the second word of data received from an I/O shim in a notification + * packet. This is the virtual address of the packet buffer, plus some flag + * bits. (The virtual address of the packet is always 256-byte aligned so we + * have room for 8 bits' worth of flags in the low 8 bits.) + * + * @internal + * NOTE: The low two bits must contain "__queue", so the "packet size" + * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly. + * + * If __addr or __offset are moved, _NETIO_PKT_BASE + * (defined right below this) must be changed. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + /** Which queue the packet will be returned to once it is sent back to + the IPP. This is one of the SIZE_xxx values. */ + unsigned int __queue:2; + + /** The IPP handle of the sending IPP. */ + unsigned int __ipp_handle:2; + + /** Reserved for future use. */ + unsigned int __reserved:1; + + /** If 1, this packet has minimal (egress) metadata; otherwise, it + has standard (ingress) metadata. */ + unsigned int __minimal:1; + + /** Offset of the metadata within the packet. This value is multiplied + * by 64 and added to the base packet address to get the metadata + * address. Note that this field is aligned within the word such that + * you can easily extract the metadata address with a 26-bit mask. */ + unsigned int __offset:2; + + /** The top 24 bits of the packet's virtual address. */ + unsigned int __addr:24; + } bits; +} +__netio_pkt_handle_t; + +#endif /* !__DOXYGEN__ */ + + +/** + * @brief A handle for an I/O packet's storage. + * @ingroup ingress + * + * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its + * packet metadata removed. It is a much smaller type that exists to + * facilitate applications where the full ::netio_pkt_t type is too + * large, such as those that cache enormous numbers of packets or wish + * to transmit packet descriptors over the UDN. + * + * Because there is no metadata, most ::netio_pkt_t operations cannot be + * performed on a netio_pkt_handle_t. It supports only + * netio_free_handle() (to free the buffer) and + * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents). + * The application must acquire any additional metadata it wants from the + * original ::netio_pkt_t and record it separately. + * + * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling + * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be + * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can + * be tested for validity with NETIO_PKT_HANDLE_IS_VALID(). + */ +typedef struct +{ + unsigned int word; /**< Opaque bits. */ +} netio_pkt_handle_t; + +/** + * @brief A packet descriptor. + * + * @ingroup ingress + * @ingroup egress + * + * This data structure represents a packet. The structure is manipulated + * through the @ref ingress and the @ref egress. + * + * While the contents of a netio_pkt_t are opaque, the structure itself is + * portable. This means that it may be shared between all tiles which have + * done a netio_input_register() call for the interface on which the pkt_t + * was initially received (via netio_get_packet()) or retrieved (via + * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to + * another tile via shared memory, or via a UDN message, or by other means. + * The destination tile may then use the pkt_t as if it had originally been + * received locally; it may read or write the packet's data, read its + * metadata, free the packet, send the packet, transfer the netio_pkt_t to + * yet another tile, and so forth. + * + * Once a netio_pkt_t has been transferred to a second tile, the first tile + * should not reference the original copy; in particular, if more than one + * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will + * become corrupted. Note also that each tile which reads or modifies + * packet data must obey the memory coherency rules outlined in @ref input. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[32]; +#else + /** For an ingress packet (one with standard metadata), this is the + * notification header we got from the I/O shim. For an egress packet + * (one with minimal metadata), this word is zero if the packet has not + * been populated, and nonzero if it has. */ + __netio_pkt_notif_t __notif_header; + + /** Virtual address of the packet buffer, plus state flags. */ + __netio_pkt_handle_t __packet; + + /** Metadata associated with the packet. */ + netio_pkt_metadata_t __metadata; +#endif +} +netio_pkt_t; + + +#ifndef __DOXYGEN__ + +#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal) +#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags) + +/* Packet information table, used by the attribute access functions below. */ +extern const uint16_t _netio_pkt_info[]; + +#endif /* __DOXYGEN__ */ + + +#ifndef __DOXYGEN__ +/* These macros are deprecated and will disappear in a future MDE release. */ +#define NETIO_PKT_GOOD_CHECKSUM(pkt) \ + NETIO_PKT_L4_CSUM_CORRECT(pkt) +#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \ + NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt) +#endif /* __DOXYGEN__ */ + + +/* Packet attribute access functions. */ + +/** Return a pointer to the metadata for a packet. + * @ingroup ingress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_M" suffix usually improves performance. This + * function must be called on an 'ingress' packet (i.e. one retrieved + * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer have not been called). Use of this + * function on an 'egress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_metadata_t* +NETIO_PKT_METADATA(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + return &pkt->__metadata; +} + + +/** Return a pointer to the minimal metadata for a packet. + * @ingroup egress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_MM" suffix usually improves performance. This + * function must be called on an 'egress' packet (i.e. one on which + * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer() + * have been called, or one retrieved by @ref netio_get_buffer()). Use of + * this function on an 'ingress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_minimal_metadata_t* +NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt) +{ + netio_assert(pkt->__packet.bits.__minimal); + return (netio_pkt_minimal_metadata_t*) &pkt->__metadata; +} + + +/** Determine whether a packet has 'minimal' metadata. + * @ingroup pktfuncs + * + * This function will return nonzero if the packet is an 'egress' + * packet (i.e. one on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer() have been called, or one + * retrieved by @ref netio_get_buffer()), and zero if the packet + * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(), + * which has not been converted into an 'egress' packet). + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet has minimal metadata. + */ +static __inline unsigned int +NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt) +{ + return pkt->__packet.bits.__minimal; +} + + +/** Return a handle for a packet's storage. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A handle for the packet's storage. + */ +static __inline netio_pkt_handle_t +NETIO_PKT_HANDLE(netio_pkt_t* pkt) +{ + netio_pkt_handle_t h; + h.word = pkt->__packet.word; + return h; +} + + +/** A special reserved value indicating the absence of a packet handle. + * + * @ingroup pktfuncs + */ +#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 }) + + +/** Test whether a packet handle is valid. + * + * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE + * to indicate no packet at all. This function tests to see if a packet + * handle is a real handle, not this special reserved value. + * + * @ingroup pktfuncs + * + * @param[in] handle Handle on which to operate. + * @return One if the packet handle is valid, else zero. + */ +static __inline unsigned int +NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle) +{ + return handle.word != 0; +} + + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] handle Handle on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle) +{ + return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING; +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. + */ + return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) & + (_NETIO_PKT_CUSTOM_LEN_RMASK << 2)); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size - + NETIO_PACKET_PADDING); +} + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt)); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. We then add two bytes. + */ + return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) & + (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2; +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) - + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the start of the packet's L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, + * the IP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_M(mda, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup ingress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_DATA_M(mda, pkt) + + NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__packet_ordinal; +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__group_ordinal; +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value will be zero if the packet does not have a VLAN tag, or if + * this value was not extracted from the packet. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK; + unsigned short* pkt_p; + int index; + unsigned short val; + + if (vl == _NETIO_PKT_VLAN_NONE) + return 0; + + pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) & + _NETIO_PKT_INFO_VLAN_RMASK]; + +#ifdef __TILECC__ + return (__insn_bytex(val) >> 16) & 0xFFF; +#else + return (__builtin_bswap32(val) >> 16) & 0xFFF; +#endif +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M() + * returns true, and otherwise, may not be well defined. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + unsigned short val = + pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) & + _NETIO_PKT_INFO_ETYPE_RMASK]; + + return __builtin_bswap32(val) >> 16; +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__flow_hash; +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_0; +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_1; +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK)); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK)); +} + + +/** Determine whether the ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK); +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this + * function. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) && + (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) || + NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD)); +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return mmd->l2_length; +} + + +/** Return the length of the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ + return mmd->l3_offset - mmd->l2_offset; +} + + +/** Return the length of the packet, starting with the L3 (IP) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup egress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l3_offset; +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l2_offset; +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD() instead of explicitly checking status with this + * function. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_BAD_M(mda, pkt); +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt); +} + + +/** Return a pointer to the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_DATA_M(mda, pkt); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt); + } +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_DATA_M(mda, pkt); + } +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, the IP) + * header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup pktfuncs + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_DATA_M(mda, pkt); + } +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ORDINAL_M(mda, pkt); +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt); +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This is usually also contained within the packet header. If the packet + * does not have a VLAN tag, the VLAN ID returned by this function is zero. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_VLAN_ID_M(mda, pkt); +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns true, and otherwise, may not be well defined. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_M(mda, pkt); +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_FLOW_HASH_M(mda, pkt); +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_0_M(mda, pkt); +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_1_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the Ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the Ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt); +} + + +/** Set an egress packet's L2 length, using a metadata pointer to speed the + * computation. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt, + int len) +{ + mmd->l2_length = len; +} + + +/** Set an egress packet's L2 length. + * @ingroup egress + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len); +} + + +/** Set an egress packet's L2 header length, using a metadata pointer to + * speed the computation. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int len) +{ + mmd->l3_offset = mmd->l2_offset + len; +} + + +/** Set an egress packet's L2 header length. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len); +} + + +/** Set up an egress packet for hardware checksum computation, using a + * metadata pointer to speed the operation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + mmd->csum_start = start; + mmd->csum_length = length; + mmd->csum_location = location; + mmd->csum_seed = seed; + mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK; +} + + +/** Set up an egress packet for hardware checksum computation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * + * @param[in,out] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset; +} + + +/** Return the number of bytes which could be prepended to a packet. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt); + } +} + + +/** Flush a packet's minimal metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt) +{ +} + +/** Number of NUMA nodes we can distribute buffers to. + * @ingroup setup */ +#define NETIO_NUM_NODE_WEIGHTS 16 + +/** + * @brief An object for specifying the characteristics of NetIO communication + * endpoint. + * + * @ingroup setup + * + * The @ref netio_input_register() function uses this structure to define + * how an application tile will communicate with an IPP. + * + * + * Future updates to NetIO may add new members to this structure, + * which can affect the success of the registration operation. Thus, + * if dynamically initializing the structure, applications are urged to + * zero it out first, for example: + * + * @code + * netio_input_config_t config; + * memset(&config, 0, sizeof (config)); + * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE; + * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS; + * config.queue_id = 0; + * . + * . + * . + * @endcode + * + * since that guarantees that any unused structure members, including + * members which did not exist when the application was first developed, + * will not have unexpected values. + * + * If statically initializing the structure, we strongly recommend use of + * C99-style named initializers, for example: + * + * @code + * netio_input_config_t config = { + * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, + * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS, + * .queue_id = 0, + * }, + * @endcode + * + * instead of the old-style structure initialization: + * + * @code + * // Bad example! Currently equivalent to the above, but don't do this. + * netio_input_config_t config = { + * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0 + * }, + * @endcode + * + * since the C99 style requires no changes to the code if elements of the + * config structure are rearranged. (It also makes the initialization much + * easier to understand.) + * + * Except for items which address a particular tile's transmit or receive + * characteristics, such as the ::NETIO_RECV flag, applications are advised + * to specify the same set of configuration data on all registrations. + * This prevents differing results if multiple tiles happen to do their + * registration operations in a different order on different invocations of + * the application. This is particularly important for things like link + * management flags, and buffer size and homing specifications. + * + * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO + * buffer pool is automatically created and mapped into the application's + * virtual address space at an address chosen by the operating system, + * using the common memory (cmem) facility in the Tilera Multicore + * Components library. The cmem facility allows multiple processes to gain + * access to shared memory which is mapped into each process at an + * identical virtual address. In order for this to work, the processes + * must have a common ancestor, which must create the common memory using + * tmc_cmem_init(). + * + * In programs using the iLib process creation API, or in programs which use + * only one process (which include programs using the pthreads library), + * tmc_cmem_init() is called automatically. All other applications + * must call it explicitly, before any child processes which might call + * netio_input_register() are created. + */ +typedef struct +{ + /** Registration characteristics. + + This value determines several characteristics of the registration; + flags for different types of behavior are ORed together to make the + final flag value. Generally applications should specify exactly + one flag from each of the following categories: + + - Whether the application will be receiving packets on this queue + (::NETIO_RECV or ::NETIO_NO_RECV). + + - Whether the application will be transmitting packets on this queue, + and if so, whether it will request egress checksum calculation + (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is + legal to call netio_get_buffer() without one of the XMIT flags, + as long as ::NETIO_RECV is specified; in this case, the retrieved + buffers must be passed to another tile for transmission. + + - Whether the application expects any vendor-specific tags in + its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM, + or ::NETIO_TAG_MRVL). This must match the configuration of the + target IPP. + + To accommodate applications written to previous versions of the NetIO + interface, none of the flags above are currently required; if omitted, + NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM | + ::NETIO_TAG_NONE were used. However, explicit specification of + the relevant flags allows NetIO to do a better job of resource + allocation, allows earlier detection of certain configuration errors, + and may enable advanced features or higher performance in the future, + so their use is strongly recommended. + + Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT + is a special case, intended primarily for use by programs which + retrieve network statistics or do link management operations. + When these flags are both specified, the resulting queue may not + be used with NetIO routines other than netio_get(), netio_set(), + and netio_input_unregister(). See @ref link for more information + on link management. + + Other flags are optional; their use is described below. + */ + int flags; + + /** Interface name. This is a string which identifies the specific + Ethernet controller hardware to be used. The format of the string + is a device type and a device index, separated by a slash; so, + the first 10 Gigabit Ethernet controller is named "xgbe/0", while + the second 10/100/1000 Megabit Ethernet controller is named "gbe/1". + */ + const char* interface; + + /** Receive packet queue size. This specifies the maximum number + of ingress packets that can be received on this queue without + being retrieved by @ref netio_get_packet(). If the IPP's distribution + algorithm calls for a packet to be sent to this queue, and this + number of packets are already pending there, the new packet + will either be discarded, or sent to another tile registered + for the same queue_id (see @ref drops). This value must + be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least + ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain + interfaces. + */ + int num_receive_packets; + + /** The queue ID being requested. Legal values for this range from 0 + to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always + greater than or equal to the number of tiles; this allows one queue + for each tile, plus at least one additional queue. Some applications + may wish to use the additional queue as a destination for unwanted + packets, since packets delivered to queues for which no tiles have + registered are discarded. + */ + unsigned int queue_id; + + /** Maximum number of small send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds + empty small egress buffers requested from the IPP but not yet + retrieved via @ref netio_get_buffer(). This value must be greater + than zero if the application will ever use @ref netio_get_buffer() + to allocate empty small egress buffers; it may be no larger than + ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty + buffer caching. + */ + int num_send_buffers_small_total; + + /** Number of small send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty small egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_small_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_small_prealloc; + + /** Maximum number of large send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + large egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_large_total; + + /** Number of large send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty large egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_large_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_large_prealloc; + + /** Maximum number of jumbo send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + jumbo egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_total; + + /** Number of jumbo send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty jumbo egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_jumbo_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_prealloc; + + /** Total packet buffer size. This determines the total size, in bytes, + of the NetIO buffer pool. Note that the maximum number of available + buffers of each size is determined during hypervisor configuration + (see the System Programmer's Guide for details); this just + influences how much host memory is allocated for those buffers. + + The buffer pool is allocated from common memory, which will be + automatically initialized if needed. If your buffer pool is larger + than 240 MB, you might need to explicitly call @c tmc_cmem_init(), + as described in the Application Libraries Reference Manual (UG227). + + Packet buffers are currently allocated in chunks of 16 MB; this + value will be rounded up to the next larger multiple of 16 MB. + If this value is zero, a default of 32 MB will be used; this was + the value used by previous versions of NetIO. Note that taking this + default also affects the placement of buffers on Linux NUMA nodes. + See @ref buffer_node_weights for an explanation of buffer placement. + + In order to successfully allocate packet buffers, Linux must have + available huge pages on the relevant Linux NUMA nodes. See the + System Programmer's Guide for information on configuring + huge page support in Linux. + */ + uint64_t total_buffer_size; + + /** Buffer placement weighting factors. + + This array specifies the relative amount of buffering to place + on each of the available Linux NUMA nodes. This array is + indexed by the NUMA node, and the values in the array are + proportional to the amount of buffer space to allocate on that + node. + + If memory striping is enabled in the Hypervisor, then there is + only one logical NUMA node (node 0). In that case, NetIO will by + default ignore the suggested buffer node weights, and buffers + will be striped across the physical memory controllers. See + UG209 System Programmer's Guide for a description of the + hypervisor option that controls memory striping. + + If memory striping is disabled, then there are up to four NUMA + nodes, corresponding to the four DDRAM controllers in the TILE + processor architecture. See UG100 Tile Processor Architecture + Overview for a diagram showing the location of each of the DDRAM + controllers relative to the tile array. + + For instance, if memory striping is disabled, the following + configuration strucure: + + @code + netio_input_config_t config = { + . + . + . + .total_buffer_size = 4 * 16 * 1024 * 1024; + .buffer_node_weights = { 1, 0, 1, 0 }, + }, + @endcode + + would result in 32 MB of buffers being placed on controller 0, and + 32 MB on controller 2. (Since buffers are allocated in units of + 16 MB, some sets of weights will not be able to be matched exactly.) + + For the weights to be effective, @ref total_buffer_size must be + nonzero. If @ref total_buffer_size is zero, causing the default + 32 MB of buffer space to be used, then any specified weights will + be ignored, and buffers will positioned as they were in previous + versions of NetIO: + + - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1, + and the other 16 MB will be placed on controller 2. + + - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2, + and the other 16 MB will be placed on controller 3. + + If @ref total_buffer_size is nonzero, but all weights are zero, + then all buffer space will be allocated on Linux NUMA node zero. + + By default, the specified buffer placement is treated as a hint; + if sufficient free memory is not available on the specified + controllers, the buffers will be allocated elsewhere. However, + if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a + failure to allocate buffer space exactly as requested will cause the + registration operation to fail with an error of ::NETIO_CANNOT_HOME. + + Note that maximal network performance cannot be achieved with + only one memory controller. + */ + uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS]; + + /** Fixed virtual address for packet buffers. Only valid when + ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the + description of that flag for details. + */ + void* fixed_buffer_va; + + /** + Maximum number of outstanding send packet requests. This value is + only relevant when an EPP is in use; it determines the number of + slots in the EPP's outgoing packet queue which this tile is allowed + to consume, and thus the number of packets which may be sent before + the sending tile must wait for an acknowledgment from the EPP. + Modifying this value is generally only helpful when using @ref + netio_send_packet_vector(), where it can help improve performance by + allowing a single vector send operation to process more packets. + Typically it is not specified, and the default, which divides the + outgoing packet slots evenly between all tiles on the chip, is used. + + If a registration asks for more outgoing packet queue slots than are + available, ::NETIO_TOOMANY_XMIT will be returned. The total number + of packet queue slots which are available for all tiles for each EPP + is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING. + + + This value is ignored if ::NETIO_XMIT is not specified in flags. + If you want to specify a large value here for a specific tile, you are + advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so + that they do not consume a default number of packet slots. Any tile + transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING + slots allocated to it; values less than that will be silently + increased by the NetIO library. + */ + int num_sends_outstanding; +} +netio_input_config_t; + + +/** Registration flags; used in the @ref netio_input_config_t structure. + * @addtogroup setup + */ +/** @{ */ + +/** Fail a registration request if we can't put packet buffers + on the specified memory controllers. */ +#define NETIO_STRICT_HOMING 0x00000002 + +/** This application expects no tags on its L2 headers. */ +#define NETIO_TAG_NONE 0x00000004 + +/** This application expects Marvell extended tags on its L2 headers. */ +#define NETIO_TAG_MRVL 0x00000008 + +/** This application expects Broadcom tags on its L2 headers. */ +#define NETIO_TAG_BRCM 0x00000010 + +/** This registration may call routines which receive packets. */ +#define NETIO_RECV 0x00000020 + +/** This registration may not call routines which receive packets. */ +#define NETIO_NO_RECV 0x00000040 + +/** This registration may call routines which transmit packets. */ +#define NETIO_XMIT 0x00000080 + +/** This registration may call routines which transmit packets with + checksum acceleration. */ +#define NETIO_XMIT_CSUM 0x00000100 + +/** This registration may not call routines which transmit packets. */ +#define NETIO_NO_XMIT 0x00000200 + +/** This registration wants NetIO buffers mapped at an application-specified + virtual address. + + NetIO buffers are by default created by the TMC common memory facility, + which must be configured by a common ancestor of all processes sharing + a network interface. When this flag is specified, NetIO buffers are + instead mapped at an address chosen by the application (and specified + in @ref netio_input_config_t::fixed_buffer_va). This allows multiple + unrelated but cooperating processes to share a NetIO interface. + All processes sharing the same interface must specify this flag, + and all must specify the same fixed virtual address. + + @ref netio_input_config_t::fixed_buffer_va must be a + multiple of 16 MB, and the packet buffers will occupy @ref + netio_input_config_t::total_buffer_size bytes of virtual address + space, beginning at that address. If any of those virtual addresses + are currently occupied by other memory objects, like application or + shared library code or data, @ref netio_input_register() will return + ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va + which will work for all applications, a good first guess might be to + use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size. + If that fails, it might be helpful to consult the running application's + virtual address description file (/proc/pid/maps) to see + which regions of virtual address space are available. + */ +#define NETIO_FIXED_BUFFER_VA 0x00000400 + +/** This registration call will not complete unless the network link + is up. The process will wait several seconds for this to happen (the + precise interval is link-dependent), but if the link does not come up, + ::NETIO_LINK_DOWN will be returned. This flag is the default if + ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by + itself does not request that the link be brought up; that can be done + with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the + latter is the default if no NETIO_AUTO_LINK_xxx flags are specified), + or by explicitly setting the link's desired state via netio_set(). + If the link is not brought up by one of those methods, and this flag + is specified, the registration operation will return ::NETIO_LINK_DOWN. + This flag is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link + management. + */ +#define NETIO_REQUIRE_LINK_UP 0x00000800 + +/** This registration call will complete even if the network link is not up. + Whenever the link is not up, packets will not be sent or received: + netio_get_packet() will return ::NETIO_NOPKT once all queued packets + have been drained, and netio_send_packet() and similar routines will + return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP + or the I/O shim is full. See @ref link for more information on link + management. + */ +#define NETIO_NOREQUIRE_LINK_UP 0x00001000 + +#ifndef __DOXYGEN__ +/* + * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags, + * but should not be used directly by applications, and are thus not + * documented. + */ +#define _NETIO_AUTO_UP 0x00002000 +#define _NETIO_AUTO_DN 0x00004000 +#define _NETIO_AUTO_PRESENT 0x00008000 +#endif + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Do not take down the link automatically. This is the default if + no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP) + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \ + _NETIO_AUTO_DN) + +/** Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN) + +/** Do not bring up the link automatically as part of this registration + operation. Do not take down the link automatically. This flag + is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT + + +/** Minimum number of receive packets. */ +#define NETIO_MIN_RECEIVE_PKTS 16 + +/** Lower bound on the maximum number of receive packets; may be higher + than this on some interfaces. */ +#define NETIO_MAX_RECEIVE_PKTS 128 + +/** Maximum number of send buffers, per packet size. */ +#define NETIO_MAX_SEND_BUFFERS 16 + +/** Number of EPP queue slots, and thus outstanding sends, per EPP. */ +#define NETIO_TOTAL_SENDS_OUTSTANDING 2015 + +/** Minimum number of EPP queue slots, and thus outstanding sends, per + * transmitting tile. */ +#define NETIO_MIN_SENDS_OUTSTANDING 16 + + +/**@}*/ + +#ifndef __DOXYGEN__ + +/** + * An object for providing Ethernet packets to a process. + */ +struct __netio_queue_impl_t; + +/** + * An object for managing the user end of a NetIO queue. + */ +struct __netio_queue_user_impl_t; + +#endif /* !__DOXYGEN__ */ + + +/** A netio_queue_t describes a NetIO communications endpoint. + * @ingroup setup + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[8]; /**< This is an opaque structure. */ +#else + struct __netio_queue_impl_t* __system_part; /**< The system part. */ + struct __netio_queue_user_impl_t* __user_part; /**< The user part. */ +#ifdef _NETIO_PTHREAD + _netio_percpu_mutex_t lock; /**< Queue lock. */ +#endif +#endif +} +netio_queue_t; + + +/** + * @brief Packet send context. + * + * @ingroup egress + * + * Packet send context for use with netio_send_packet_prepare and _commit. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[44]; /**< This is an opaque structure. */ +#else + uint8_t flags; /**< Defined below */ + uint8_t datalen; /**< Number of valid words pointed to by data. */ + uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note + that this is smaller than the 11-word maximum + request size, since some constant values are + not saved in the context. */ + uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */ +#endif +} +netio_send_pkt_context_t; + + +#ifndef __DOXYGEN__ +#define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */ +#define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */ +#endif + +/** + * @brief Packet vector entry. + * + * @ingroup egress + * + * This data structure is used with netio_send_packet_vector() to send multiple + * packets with one NetIO call. The structure should be initialized by + * calling netio_pkt_vector_set(), rather than by setting the fields + * directly. + * + * This structure is guaranteed to be a power of two in size, no + * bigger than one L2 cache line, and to be aligned modulo its size. + */ +typedef struct +#ifndef __DOXYGEN__ +__attribute__((aligned(8))) +#endif +{ + /** Reserved for use by the user application. When initialized with + * the netio_set_pkt_vector_entry() function, this field is guaranteed + * to be visible to readers only after all other fields are already + * visible. This way it can be used as a valid flag or generation + * counter. */ + uint8_t user_data; + + /* Structure members below this point should not be accessed directly by + * applications, as they may change in the future. */ + + /** Low 8 bits of the packet address to send. The high bits are + * acquired from the 'handle' field. */ + uint8_t buffer_address_low; + + /** Number of bytes to transmit. */ + uint16_t size; + + /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE, + * this vector entry will be skipped and no packet will be transmitted. */ + netio_pkt_handle_t handle; +} +netio_pkt_vector_entry_t; + + +/** + * @brief Initialize fields in a packet vector entry. + * + * @ingroup egress + * + * @param[out] v Pointer to the vector entry to be initialized. + * @param[in] pkt Packet to be transmitted when the vector entry is passed to + * netio_send_packet_vector(). Note that the packet's attributes + * (e.g., its L2 offset and length) are captured at the time this + * routine is called; subsequent changes in those attributes will not + * be reflected in the packet which is actually transmitted. + * Changes in the packet's contents, however, will be so reflected. + * If this is NULL, no packet will be transmitted. + * @param[in] user_data User data to be set in the vector entry. + * This function guarantees that the "user_data" field will become + * visible to a reader only after all other fields have become visible. + * This allows a structure in a ring buffer to be written and read + * by a polling reader without any locks or other synchronization. + */ +static __inline void +netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt, + uint8_t user_data) +{ + if (pkt) + { + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = + (netio_pkt_minimal_metadata_t*) &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt); + } + v->handle.word = pkt->__packet.word; + } + else + { + v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */ + } + + __asm__("" : : : "memory"); + + v->user_data = user_data; +} + + +/** + * Flags and structures for @ref netio_get() and @ref netio_set(). + * @ingroup config + */ + +/** @{ */ +/** Parameter class; addr is a NETIO_PARAM_xxx value. */ +#define NETIO_PARAM 0 +/** Interface MAC address. This address is only valid with @ref netio_get(). + * The value is a 6-byte MAC address. Depending upon the overall system + * design, a MAC address may or may not be available for each interface. */ +#define NETIO_PARAM_MAC 0 + +/** Determine whether to suspend output on the receipt of pause frames. + * If the value is nonzero, the I/O shim will suspend output when a pause + * frame is received. If the value is zero, pause frames will be ignored. */ +#define NETIO_PARAM_PAUSE_IN 1 + +/** Determine whether to send pause frames if the I/O shim packet FIFOs are + * nearly full. If the value is zero, pause frames are not sent. If + * the value is nonzero, it is the delay value which will be sent in any + * pause frames which are output, in units of 512 bit times. */ +#define NETIO_PARAM_PAUSE_OUT 2 + +/** Jumbo frame support. The value is a 4-byte integer. If the value is + * nonzero, the MAC will accept frames of up to 10240 bytes. If the value + * is zero, the MAC will only accept frames of up to 1544 bytes. */ +#define NETIO_PARAM_JUMBO 3 + +/** I/O shim's overflow statistics register. The value is two 16-bit integers. + * The first 16-bit value (or the low 16 bits, if the value is treated as a + * 32-bit number) is the count of packets which were completely dropped and + * not delivered by the shim. The second 16-bit value (or the high 16 bits, + * if the value is treated as a 32-bit number) is the count of packets + * which were truncated and thus only partially delivered by the shim. This + * register is automatically reset to zero after it has been read. + */ +#define NETIO_PARAM_OVERFLOW 4 + +/** IPP statistics. This address is only valid with @ref netio_get(). The + * value is a netio_stat_t structure. Unlike the I/O shim statistics, the + * IPP statistics are not all reset to zero on read; see the description + * of the netio_stat_t for details. */ +#define NETIO_PARAM_STAT 5 + +/** Possible link state. The value is a combination of "NETIO_LINK_xxx" + * flags. With @ref netio_get(), this will indicate which flags are + * actually supported by the hardware. + * + * For historical reasons, specifying this value to netio_set() will have + * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is + * discouraged. + */ +#define NETIO_PARAM_LINK_POSSIBLE_STATE 6 + +/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags. + * With @ref netio_set(), this will attempt to immediately bring up the + * link using whichever of the requested flags are supported by the + * hardware, or take down the link if the flags are zero; if this is + * not possible, an error will be returned. Many programs will want + * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead. + * + * For historical reasons, specifying this value to netio_get() will + * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE, + * but this usage is discouraged. + */ +#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE + +/** Current link state. This address is only valid with @ref netio_get(). + * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together. + * If the link is down, the value ANDed with NETIO_LINK_SPEED will be + * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will + * result in exactly one of the NETIO_LINK_xxx values, indicating the + * current speed. */ +#define NETIO_PARAM_LINK_CURRENT_STATE 7 + +/** Variant symbol for current state, retained for compatibility with + * pre-MDE-2.1 programs. */ +#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE + +/** Packet Coherence protocol. This address is only valid with @ref netio_get(). + * The value is nonzero if the interface is configured for cache-coherent DMA. + */ +#define NETIO_PARAM_COHERENT 8 + +/** Desired link state. The value is a conbination of "NETIO_LINK_xxx" + * flags, which specify the desired state for the link. With @ref + * netio_set(), this will, in the background, attempt to bring up the link + * using whichever of the requested flags are reasonable, or take down the + * link if the flags are zero. The actual link up or down operation may + * happen after this call completes. If the link state changes in the + * future, the system will continue to try to get back to the desired link + * state; for instance, if the link is brought up successfully, and then + * the network cable is disconnected, the link will go down. However, the + * desired state of the link is still up, so if the cable is reconnected, + * the link will be brought up again. + * + * With @ref netio_get(), this will indicate the desired state for the + * link, as set with a previous netio_set() call, or implicitly by a + * netio_input_register() or netio_input_unregister() operation. This may + * not reflect the current state of the link; to get that, use + * ::NETIO_PARAM_LINK_CURRENT_STATE. */ +#define NETIO_PARAM_LINK_DESIRED_STATE 9 + +/** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT + * address passed to @ref netio_get(). */ +typedef struct +{ + /** Number of packets which have been received by the IPP and forwarded + * to a tile's receive queue for processing. This value wraps at its + * maximum, and is not cleared upon read. */ + uint32_t packets_received; + + /** Number of packets which have been dropped by the IPP, because they could + * not be received, or could not be forwarded to a tile. The former happens + * when the IPP does not have a free packet buffer of suitable size for an + * incoming frame. The latter happens when all potential destination tiles + * for a packet, as defined by the group, bucket, and queue configuration, + * have full receive queues. This value wraps at its maximum, and is not + * cleared upon read. */ + uint32_t packets_dropped; + + /* + * Note: the #defines after each of the following four one-byte values + * denote their location within the third word of the netio_stat_t. They + * are intended for use only by the IPP implementation and are thus omitted + * from the Doxygen output. + */ + + /** Number of packets dropped because no worker was able to accept a new + * packet. This value saturates at its maximum, and is cleared upon + * read. */ + uint8_t drops_no_worker; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_WORKER 0 +#endif + + /** Number of packets dropped because no small buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_smallbuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_SMALLBUF 1 +#endif + + /** Number of packets dropped because no large buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_largebuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_LARGEBUF 2 +#endif + + /** Number of packets dropped because no jumbo buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_jumbobuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_JUMBOBUF 3 +#endif +} +netio_stat_t; + + +/** Link can run, should run, or is running at 10 Mbps. */ +#define NETIO_LINK_10M 0x01 + +/** Link can run, should run, or is running at 100 Mbps. */ +#define NETIO_LINK_100M 0x02 + +/** Link can run, should run, or is running at 1 Gbps. */ +#define NETIO_LINK_1G 0x04 + +/** Link can run, should run, or is running at 10 Gbps. */ +#define NETIO_LINK_10G 0x08 + +/** Link should run at the highest speed supported by the link and by + * the device connected to the link. Only usable as a value for + * the link's desired state; never returned as a value for the current + * or possible states. */ +#define NETIO_LINK_ANYSPEED 0x10 + +/** All legal link speeds. */ +#define NETIO_LINK_SPEED (NETIO_LINK_10M | \ + NETIO_LINK_100M | \ + NETIO_LINK_1G | \ + NETIO_LINK_10G | \ + NETIO_LINK_ANYSPEED) + + +/** MAC register class. Addr is a register offset within the MAC. + * Registers within the XGbE and GbE MACs are documented in the Tile + * Processor I/O Device Guide (UG104). MAC registers start at address + * 0x4000, and do not include the MAC_INTERFACE registers. */ +#define NETIO_MAC 1 + +/** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO 2 + +/** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO_CLAUSE45 3 + +/** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO + * address passed to @ref netio_get() or @ref netio_set(). */ +typedef union +{ + struct + { + unsigned int reg:16; /**< MDIO register offset. For clause 22 access, + must be less than 32. */ + unsigned int phy:5; /**< Which MDIO PHY to access. */ + unsigned int dev:5; /**< Which MDIO device to access within that PHY. + Applicable for clause 45 access only; ignored + for clause 22 access. */ + } + bits; /**< Container for bitfields. */ + uint64_t addr; /**< Value to pass to @ref netio_get() or + * @ref netio_set(). */ +} +netio_mdio_addr_t; + +/** @} */ + +#endif /* __NETIO_INTF_H__ */ diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 78e1982cb6c9..0b9ce69b0ee5 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -988,8 +988,12 @@ static long __write_once initfree = 1; /* Select whether to free (1) or mark unusable (0) the __init pages. */ static int __init set_initfree(char *str) { - strict_strtol(str, 0, &initfree); - pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); + long val; + if (strict_strtol(str, 0, &val)) { + initfree = val; + pr_info("initfree: %s free init pages\n", + initfree ? "will" : "won't"); + } return 1; } __setup("initfree=", set_initfree); -- cgit v1.2.3 From f02cbbe657939489347cbda598401a56913ffcbd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 2 Nov 2010 12:05:10 -0400 Subject: pci root complex: support for tile architecture This change enables PCI root complex support for TILEPro. Unlike TILE-Gx, TILEPro has no support for memory-mapped I/O, so the PCI support consists of hypervisor upcalls for PIO, DMA, etc. However, the performance is fine for the devices we have tested with so far (1Gb Ethernet, SATA, etc.). The header was tweaked to be a little bit more aggressive about disabling attempts to map/unmap IO port space. The hacky header was rolled into the header and the result was simplified. Both of the latter two headers were preliminary versions not meant for release before now - oh well. There is one quirk for our TILEmpower platform, which accidentally negotiates up to 5GT and needs to be kicked down to 2.5GT. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 12 + arch/tile/include/asm/io.h | 15 +- arch/tile/include/asm/pci-bridge.h | 117 ------- arch/tile/include/asm/pci.h | 107 +++---- arch/tile/kernel/Makefile | 1 + arch/tile/kernel/pci.c | 621 +++++++++++++++++++++++++++++++++++++ 6 files changed, 686 insertions(+), 187 deletions(-) delete mode 100644 arch/tile/include/asm/pci-bridge.h create mode 100644 arch/tile/kernel/pci.c (limited to 'arch') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 07ec8a865c1d..e11b5fcb70eb 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -329,6 +329,18 @@ endmenu # Tilera-specific configuration menu "Bus options" +config PCI + bool "PCI support" + default y + select PCI_DOMAINS + ---help--- + Enable PCI root complex support, so PCIe endpoint devices can + be attached to the Tile chip. Many, but not all, PCI devices + are supported under Tilera's root complex driver. + +config PCI_DOMAINS + bool + config NO_IOMEM def_bool !PCI diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index ee43328713ab..d3cbb9b14cbe 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr); #define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) -void __iomem *ioport_map(unsigned long port, unsigned int len); -extern inline void ioport_unmap(void __iomem *addr) {} - #define mmiowb() /* Conversion between virtual and physical mappings. */ @@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, * we never run, uses them unconditionally. */ -static inline int ioport_panic(void) +static inline long ioport_panic(void) { panic("inb/outb and friends do not exist on tile"); return 0; } +static inline void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + return (void __iomem *) ioport_panic(); +} + +static inline void ioport_unmap(void __iomem *addr) +{ + ioport_panic(); +} + static inline u8 inb(unsigned long addr) { return ioport_panic(); diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h deleted file mode 100644 index e853b0e2793b..000000000000 --- a/arch/tile/include/asm/pci-bridge.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef _ASM_TILE_PCI_BRIDGE_H -#define _ASM_TILE_PCI_BRIDGE_H - -#include -#include - -struct device_node; -struct pci_controller; - -/* - * pci_io_base returns the memory address at which you can access - * the I/O space for PCI bus number `bus' (or NULL on error). - */ -extern void __iomem *pci_bus_io_base(unsigned int bus); -extern unsigned long pci_bus_io_base_phys(unsigned int bus); -extern unsigned long pci_bus_mem_base_phys(unsigned int bus); - -/* Allocate a new PCI host bridge structure */ -extern struct pci_controller *pcibios_alloc_controller(void); - -/* Helper function for setting up resources */ -extern void pci_init_resource(struct resource *res, unsigned long start, - unsigned long end, int flags, char *name); - -/* Get the PCI host controller for a bus */ -extern struct pci_controller *pci_bus_to_hose(int bus); - -/* - * Structure of a PCI controller (host bridge) - */ -struct pci_controller { - int index; /* PCI domain number */ - struct pci_bus *root_bus; - - int first_busno; - int last_busno; - - int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ - int hv_mem_fd; /* fd to Hypervisor for MMIO operations */ - - struct pci_ops *ops; - - int irq_base; /* Base IRQ from the Hypervisor */ - int plx_gen1; /* flag for PLX Gen 1 configuration */ - - /* Address ranges that are routed to this controller/bridge. */ - struct resource mem_resources[3]; -}; - -static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus) -{ - return bus->sysdata; -} - -extern void setup_indirect_pci_nomap(struct pci_controller *hose, - void __iomem *cfg_addr, void __iomem *cfg_data); -extern void setup_indirect_pci(struct pci_controller *hose, - u32 cfg_addr, u32 cfg_data); -extern void setup_grackle(struct pci_controller *hose); - -extern unsigned char common_swizzle(struct pci_dev *, unsigned char *); - -/* - * The following code swizzles for exactly one bridge. The routine - * common_swizzle below handles multiple bridges. But there are a - * some boards that don't follow the PCI spec's suggestion so we - * break this piece out separately. - */ -static inline unsigned char bridge_swizzle(unsigned char pin, - unsigned char idsel) -{ - return (((pin-1) + idsel) % 4) + 1; -} - -/* - * The following macro is used to lookup irqs in a standard table - * format for those PPC systems that do not already have PCI - * interrupts properly routed. - */ -/* FIXME - double check this */ -#define PCI_IRQ_TABLE_LOOKUP ({ \ - long _ctl_ = -1; \ - if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \ - _ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \ - _ctl_; \ -}) - -/* - * Scan the buses below a given PCI host bridge and assign suitable - * resources to all devices found. - */ -extern int pciauto_bus_scan(struct pci_controller *, int); - -#ifdef CONFIG_PCI -extern unsigned long pci_address_to_pio(phys_addr_t address); -#else -static inline unsigned long pci_address_to_pio(phys_addr_t address) -{ - return (unsigned long)-1; -} -#endif - -#endif /* _ASM_TILE_PCI_BRIDGE_H */ diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index b0c15da2d5d5..c3fc458a0d32 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -15,7 +15,29 @@ #ifndef _ASM_TILE_PCI_H #define _ASM_TILE_PCI_H -#include +#include + +/* + * Structure of a PCI controller (host bridge) + */ +struct pci_controller { + int index; /* PCI domain number */ + struct pci_bus *root_bus; + + int first_busno; + int last_busno; + + int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ + int hv_mem_fd; /* fd to Hypervisor for MMIO operations */ + + struct pci_ops *ops; + + int irq_base; /* Base IRQ from the Hypervisor */ + int plx_gen1; /* flag for PLX Gen 1 configuration */ + + /* Address ranges that are routed to this controller/bridge. */ + struct resource mem_resources[3]; +}; /* * The hypervisor maps the entirety of CPA-space as bus addresses, so @@ -24,56 +46,12 @@ */ #define PCI_DMA_BUS_IS_PHYS 1 -struct pci_controller *pci_bus_to_hose(int bus); -unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp); int __init tile_pci_init(void); -void pci_iounmap(struct pci_dev *dev, void __iomem *addr); -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); -void __devinit pcibios_fixup_bus(struct pci_bus *bus); -int __devinit _tile_cfg_read(struct pci_controller *hose, - int bus, - int slot, - int function, - int offset, - int size, - u32 *val); -int __devinit _tile_cfg_write(struct pci_controller *hose, - int bus, - int slot, - int function, - int offset, - int size, - u32 val); +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} -/* - * These are used to to config reads and writes in the early stages of - * setup before the driver infrastructure has been set up enough to be - * able to do config reads and writes. - */ -#define early_cfg_read(where, size, value) \ - _tile_cfg_read(controller, \ - current_bus, \ - pci_slot, \ - pci_fn, \ - where, \ - size, \ - value) - -#define early_cfg_write(where, size, value) \ - _tile_cfg_write(controller, \ - current_bus, \ - pci_slot, \ - pci_fn, \ - where, \ - size, \ - value) - - - -#define PCICFG_BYTE 1 -#define PCICFG_WORD 2 -#define PCICFG_DWORD 4 +void __devinit pcibios_fixup_bus(struct pci_bus *bus); #define TILE_NUM_PCIE 2 @@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus) } /* - * I/O space is currently not supported. + * pcibios_assign_all_busses() tells whether or not the bus numbers + * should be reassigned, in case the BIOS didn't do it correctly, or + * in case we don't have a BIOS and we want to let Linux do it. */ +static inline int pcibios_assign_all_busses(void) +{ + return 1; +} -#define TILE_PCIE_LOWER_IO 0x0 -#define TILE_PCIE_UPPER_IO 0x10000 -#define TILE_PCIE_PCIE_IO_SIZE 0x0000FFFF - -#define _PAGE_NO_CACHE 0 -#define _PAGE_GUARDED 0 - - -#define pcibios_assign_all_busses() pci_assign_all_buses -extern int pci_assign_all_buses; - +/* + * No special bus mastering setup handling. + */ static inline void pcibios_set_master(struct pci_dev *dev) { - /* No special bus mastering setup handling */ } #define PCIBIOS_MIN_MEM 0 -#define PCIBIOS_MIN_IO TILE_PCIE_LOWER_IO +#define PCIBIOS_MIN_IO 0 /* * This flag tells if the platform is TILEmpower that needs * special configuration for the PLX switch chip. */ -extern int blade_pci; +extern int tile_plx_gen1; + +/* Use any cpu for PCI. */ +#define cpumask_of_pcibus(bus) cpu_online_mask /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include @@ -122,7 +100,4 @@ extern int blade_pci; /* generic pci stuff */ #include -/* Use any cpu for PCI. */ -#define cpumask_of_pcibus(bus) cpu_online_mask - #endif /* _ASM_TILE_PCI_H */ diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 112b1e248f05..b4c8e8ec45dc 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_PCI) += pci.o diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c new file mode 100644 index 000000000000..a1ee25be9ad9 --- /dev/null +++ b/arch/tile/kernel/pci.c @@ -0,0 +1,621 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/* + * Initialization flow and process + * ------------------------------- + * + * This files containes the routines to search for PCI buses, + * enumerate the buses, and configure any attached devices. + * + * There are two entry points here: + * 1) tile_pci_init + * This sets up the pci_controller structs, and opens the + * FDs to the hypervisor. This is called from setup_arch() early + * in the boot process. + * 2) pcibios_init + * This probes the PCI bus(es) for any attached hardware. It's + * called by subsys_initcall. All of the real work is done by the + * generic Linux PCI layer. + * + */ + +/* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +int __write_once tile_plx_gen1; + +static struct pci_controller controllers[TILE_NUM_PCIE]; +static int num_controllers; + +static struct pci_ops tile_cfg_ops; + + +/* + * We don't need to worry about the alignment of resources. + */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + return res->start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Open a FD to the hypervisor PCI device. + * + * controller_id is the controller number, config type is 0 or 1 for + * config0 or config1 operations. + */ +static int __init tile_pcie_open(int controller_id, int config_type) +{ + char filename[32]; + int fd; + + sprintf(filename, "pcie/%d/config%d", controller_id, config_type); + + fd = hv_dev_open((HV_VirtAddr)filename, 0); + + return fd; +} + + +/* + * Get the IRQ numbers from the HV and set up the handlers for them. + */ +static int __init tile_init_irqs(int controller_id, + struct pci_controller *controller) +{ + char filename[32]; + int fd; + int ret; + int x; + struct pcie_rc_config rc_config; + + sprintf(filename, "pcie/%d/ctl", controller_id); + fd = hv_dev_open((HV_VirtAddr)filename, 0); + if (fd < 0) { + pr_err("PCI: hv_dev_open(%s) failed\n", filename); + return -1; + } + ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config), + sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF); + hv_dev_close(fd); + if (ret != sizeof(rc_config)) { + pr_err("PCI: wanted %zd bytes, got %d\n", + sizeof(rc_config), ret); + return -1; + } + /* Record irq_base so that we can map INTx to IRQ # later. */ + controller->irq_base = rc_config.intr; + + for (x = 0; x < 4; x++) + tile_irq_activate(rc_config.intr + x, + TILE_IRQ_HW_CLEAR); + + if (rc_config.plx_gen1) + controller->plx_gen1 = 1; + + return 0; +} + +/* + * First initialization entry point, called from setup_arch(). + * + * Find valid controllers and fill in pci_controller structs for each + * of them. + * + * Returns the number of controllers discovered. + */ +int __init tile_pci_init(void) +{ + int i; + + pr_info("PCI: Searching for controllers...\n"); + + /* Do any configuration we need before using the PCIe */ + + for (i = 0; i < TILE_NUM_PCIE; i++) { + int hv_cfg_fd0 = -1; + int hv_cfg_fd1 = -1; + int hv_mem_fd = -1; + char name[32]; + struct pci_controller *controller; + + /* + * Open the fd to the HV. If it fails then this + * device doesn't exist. + */ + hv_cfg_fd0 = tile_pcie_open(i, 0); + if (hv_cfg_fd0 < 0) + continue; + hv_cfg_fd1 = tile_pcie_open(i, 1); + if (hv_cfg_fd1 < 0) { + pr_err("PCI: Couldn't open config fd to HV " + "for controller %d\n", i); + goto err_cont; + } + + sprintf(name, "pcie/%d/mem", i); + hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); + if (hv_mem_fd < 0) { + pr_err("PCI: Could not open mem fd to HV!\n"); + goto err_cont; + } + + pr_info("PCI: Found PCI controller #%d\n", i); + + controller = &controllers[num_controllers]; + + if (tile_init_irqs(i, controller)) { + pr_err("PCI: Could not initialize " + "IRQs, aborting.\n"); + goto err_cont; + } + + controller->index = num_controllers; + controller->hv_cfg_fd[0] = hv_cfg_fd0; + controller->hv_cfg_fd[1] = hv_cfg_fd1; + controller->hv_mem_fd = hv_mem_fd; + controller->first_busno = 0; + controller->last_busno = 0xff; + controller->ops = &tile_cfg_ops; + + num_controllers++; + continue; + +err_cont: + if (hv_cfg_fd0 >= 0) + hv_dev_close(hv_cfg_fd0); + if (hv_cfg_fd1 >= 0) + hv_dev_close(hv_cfg_fd1); + if (hv_mem_fd >= 0) + hv_dev_close(hv_mem_fd); + continue; + } + + /* + * Before using the PCIe, see if we need to do any platform-specific + * configuration, such as the PLX switch Gen 1 issue on TILEmpower. + */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + + if (controller->plx_gen1) + tile_plx_gen1 = 1; + } + + return num_controllers; +} + +/* + * (pin - 1) converts from the PCI standard's [1:4] convention to + * a normal [0:3] range. + */ +static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + struct pci_controller *controller = + (struct pci_controller *)dev->sysdata; + return (pin - 1) + controller->irq_base; +} + + +static void __init fixup_read_and_payload_sizes(void) +{ + struct pci_dev *dev = NULL; + int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */ + int max_read_size = 0x2; /* Limit to 512 byte reads. */ + u16 new_values; + + /* Scan for the smallest maximum payload size. */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u32 devcap; + int max_payload; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, + &devcap); + max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; + if (max_payload < smallest_max_payload) + smallest_max_payload = max_payload; + } + + /* Now, set the max_payload_size for all devices to that value. */ + new_values = (max_read_size << 12) | (smallest_max_payload << 5); + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u16 devctl; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + &devctl); + devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); + devctl |= new_values; + pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + devctl); + } +} + + +/* + * Second PCI initialization entry point, called by subsys_initcall. + * + * The controllers have been set up by the time we get here, by a call to + * tile_pci_init. + */ +static int __init pcibios_init(void) +{ + int i; + + pr_info("PCI: Probing PCI hardware\n"); + + /* + * Delay a bit in case devices aren't ready. Some devices are + * known to require at least 20ms here, but we use a more + * conservative value. + */ + mdelay(250); + + /* Scan all of the recorded PCI controllers. */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + struct pci_bus *bus; + + pr_info("PCI: initializing controller #%d\n", i); + + /* + * This comes from the generic Linux PCI driver. + * + * It reads the PCI tree for this bus into the Linux + * data structures. + * + * This is inlined in linux/pci.h and calls into + * pci_scan_bus_parented() in probe.c. + */ + bus = pci_scan_bus(0, controller->ops, controller); + controller->root_bus = bus; + controller->last_busno = bus->subordinate; + + } + + /* Do machine dependent PCI interrupt routing */ + pci_fixup_irqs(pci_common_swizzle, tile_map_irq); + + /* + * This comes from the generic Linux PCI driver. + * + * It allocates all of the resources (I/O memory, etc) + * associated with the devices read in above. + */ + + pci_assign_unassigned_resources(); + + /* Configure the max_read_size and max_payload_size values. */ + fixup_read_and_payload_sizes(); + + /* Record the I/O resources in the PCI controller structure. */ + for (i = 0; i < num_controllers; i++) { + struct pci_bus *root_bus = controllers[i].root_bus; + struct pci_bus *next_bus; + struct pci_dev *dev; + + list_for_each_entry(dev, &root_bus->devices, bus_list) { + /* Find the PCI host controller, ie. the 1st bridge. */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (PCI_SLOT(dev->devfn) == 0)) { + next_bus = dev->subordinate; + controllers[i].mem_resources[0] = + *next_bus->resource[0]; + controllers[i].mem_resources[1] = + *next_bus->resource[1]; + controllers[i].mem_resources[2] = + *next_bus->resource[2]; + + break; + } + } + + } + + return 0; +} +subsys_initcall(pcibios_init); + +/* + * No bus fixups needed. + */ +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + /* Nothing needs to be done. */ +} + +/* + * This can be called from the generic PCI layer, but doesn't need to + * do anything. + */ +char __devinit *pcibios_setup(char *str) +{ + /* Nothing needs to be done. */ + return str; +} + +/* + * This is called from the generic Linux layer. + */ +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +/* + * Enable memory and/or address decoding, as appropriate, for the + * device described by the 'dev' struct. + * + * This is called from the generic PCI layer, and can be called + * for bridges or endpoints. + */ +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + u8 header_type; + int i; + struct resource *r; + + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* + * For bridges, we enable both memory and I/O decoding + * in call cases. + */ + cmd |= PCI_COMMAND_IO; + cmd |= PCI_COMMAND_MEMORY; + } else { + /* + * For endpoints, we enable memory and/or I/O decoding + * only if they have a memory resource of that type. + */ + for (i = 0; i < 6; i++) { + r = &dev->resource[i]; + if (r->flags & IORESOURCE_UNSET) { + pr_err("PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + } + + /* + * We only write the command if it changed. + */ + if (cmd != old_cmd) + pci_write_config_word(dev, PCI_COMMAND, cmd); + return 0; +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len) + return NULL; + if (max && len > max) + len = max; + + if (!(flags & IORESOURCE_MEM)) { + pr_info("PCI: Trying to map invalid resource %#lx\n", flags); + start = 0; + } + + return (void __iomem *)start; +} +EXPORT_SYMBOL(pci_iomap); + + +/**************************************************************** + * + * Tile PCI config space read/write routines + * + ****************************************************************/ + +/* + * These are the normal read and write ops + * These are expanded with macros from pci_bus_read_config_byte() etc. + * + * devfn is the combined PCI slot & function. + * + * offset is in bytes, from the start of config space for the + * specified bus & slot. + */ + +static int __devinit tile_cfg_read(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 *val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + + /* + * There is no bridge between the Tile and bus 0, so we + * use config0 to talk to bus 0. + * + * If we're talking to a bus other than zero then we + * must have found a bridge. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) { + *val = 0xFFFFFFFF; + return 0; + } + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + + return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0, + (HV_VirtAddr)(val), size, addr); +} + + +/* + * See tile_cfg_read() for relevent comments. + * Note that "val" is the value to write, not a pointer to that value. + */ +static int __devinit tile_cfg_write(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + HV_VirtAddr valp = (HV_VirtAddr)&val; + + /* + * For bus 0 slot 0 we use config 0 accesses. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) + return 0; + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + +#ifdef __BIG_ENDIAN + /* Point to the correct part of the 32-bit "val". */ + valp += 4 - size; +#endif + + return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0, + valp, size, addr); +} + + +static struct pci_ops tile_cfg_ops = { + .read = tile_cfg_read, + .write = tile_cfg_write, +}; + + +/* + * In the following, each PCI controller's mem_resources[1] + * represents its (non-prefetchable) PCI memory resource. + * mem_resources[0] and mem_resources[2] refer to its PCI I/O and + * prefetchable PCI memory resources, respectively. + * For more details, see pci_setup_bridge() in setup-bus.c. + * By comparing the target PCI memory address against the + * end address of controller 0, we can determine the controller + * that should accept the PCI memory access. + */ +#define TILE_READ(size, type) \ +type _tile_read##size(unsigned long addr) \ +{ \ + type val; \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: read %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ + return val; \ +} \ +EXPORT_SYMBOL(_tile_read##size) + +TILE_READ(b, u8); +TILE_READ(w, u16); +TILE_READ(l, u32); +TILE_READ(q, u64); + +#define TILE_WRITE(size, type) \ +void _tile_write##size(type val, unsigned long addr) \ +{ \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: write %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ +} \ +EXPORT_SYMBOL(_tile_write##size) + +TILE_WRITE(b, u8); +TILE_WRITE(w, u16); +TILE_WRITE(l, u32); +TILE_WRITE(q, u64); -- cgit v1.2.3 From 4d658d13c90f14cf3510ca15cafe2f4aa9e23d64 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 24 Nov 2010 13:42:15 -0500 Subject: arch/tile: make glibc's sysconf(_SC_NPROCESSORS_CONF) work correctly glibc assumes that it can count /sys/devices/system/cpu/cpu* to get the number of configured cpus. For this to be valid on tile, we need to generate a "cpu" entry for all cpus, including the ones that are not currently allocated for Linux's use. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index fb0b3cbeae14..f18573643ed1 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -840,7 +840,7 @@ static int __init topology_init(void) for_each_online_node(i) register_one_node(i); - for_each_present_cpu(i) + for (i = 0; i < smp_height * smp_width; ++i) register_cpu(&cpu_devices[i], i); return 0; -- cgit v1.2.3 From 3edabee2ed22ee4f98f4b4bb38a41059226a8446 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 24 Nov 2010 13:57:42 -0500 Subject: arch/tile: fix memchr() not to dereference memory for zero length This change fixes a bug that memchr() will read the first word of the source even if the length is zero. Ironically, the code was originally written with a test to avoid exactly this problem, but to make the code conform to Linux coding standards with all declarations preceding all statements, the first load from memory was moved up above that test as the initial value for a variable. The change just moves all the variable declarations to the top of the file, with no initializers, so that the test can also be at the top of the file. Signed-off-by: Chris Metcalf --- arch/tile/lib/memchr_32.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c index 6235283b4859..cc3d9badf030 100644 --- a/arch/tile/lib/memchr_32.c +++ b/arch/tile/lib/memchr_32.c @@ -18,12 +18,24 @@ void *memchr(const void *s, int c, size_t n) { + const uint32_t *last_word_ptr; + const uint32_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint32_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect(n == 0, 0)) { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + /* Get an aligned pointer. */ - const uintptr_t s_int = (uintptr_t) s; - const uint32_t *p = (const uint32_t *)(s_int & -4); + s_int = (uintptr_t) s; + p = (const uint32_t *)(s_int & -4); /* Create four copies of the byte for which we are looking. */ - const uint32_t goal = 0x01010101 * (uint8_t) c; + goal = 0x01010101 * (uint8_t) c; /* Read the first word, but munge it so that bytes before the array * will not match goal. @@ -31,23 +43,14 @@ void *memchr(const void *s, int c, size_t n) * Note that this shift count expression works because we know * shift counts are taken mod 32. */ - const uint32_t before_mask = (1 << (s_int << 3)) - 1; - uint32_t v = (*p | before_mask) ^ (goal & before_mask); + before_mask = (1 << (s_int << 3)) - 1; + v = (*p | before_mask) ^ (goal & before_mask); /* Compute the address of the last byte. */ - const char *const last_byte_ptr = (const char *)s + n - 1; + last_byte_ptr = (const char *)s + n - 1; /* Compute the address of the word containing the last byte. */ - const uint32_t *const last_word_ptr = - (const uint32_t *)((uintptr_t) last_byte_ptr & -4); - - uint32_t bits; - char *ret; - - if (__builtin_expect(n == 0, 0)) { - /* Don't dereference any memory if the array is empty. */ - return NULL; - } + last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4); while ((bits = __insn_seqb(v, goal)) == 0) { if (__builtin_expect(p == last_word_ptr, 0)) { -- cgit v1.2.3 From 0e91ec0c06d2cd15071a6021c94840a50e6671aa Mon Sep 17 00:00:00 2001 From: James Jones Date: Wed, 24 Nov 2010 00:21:37 +0100 Subject: ARM: 6482/2: Fix find_next_zero_bit and related assembly The find_next_bit, find_first_bit, find_next_zero_bit and find_first_zero_bit functions were not properly clamping to the maxbit argument at the bit level. They were instead only checking maxbit at the byte level. To fix this, add a compare and a conditional move instruction to the end of the common bit-within-the- byte code used by all the functions and be sure not to clobber the maxbit argument before it is used. Cc: Reviewed-by: Nicolas Pitre Tested-by: Stephen Warren Signed-off-by: James Jones Signed-off-by: Russell King --- arch/arm/lib/findbit.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 1e4cbd4e7be9..64f6bc1a9132 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -174,8 +174,8 @@ ENDPROC(_find_next_bit_be) */ .L_found: #if __LINUX_ARM_ARCH__ >= 5 - rsb r1, r3, #0 - and r3, r3, r1 + rsb r0, r3, #0 + and r3, r3, r0 clz r3, r3 rsb r3, r3, #31 add r0, r2, r3 @@ -190,5 +190,7 @@ ENDPROC(_find_next_bit_be) addeq r2, r2, #1 mov r0, r2 #endif + cmp r1, r0 @ Clamp to maxbit + movlo r0, r1 mov pc, lr -- cgit v1.2.3 From 69e83dad5207f8f03c9699e57e1febb114383cb8 Mon Sep 17 00:00:00 2001 From: Will Newton Date: Wed, 24 Nov 2010 12:56:55 -0800 Subject: uml: disable winch irq before freeing handler data Disable the winch irq early to make sure we don't take an interrupt part way through the freeing of the handler data, resulting in a crash on shutdown: winch_interrupt : read failed, errno = 9 fd 13 is losing SIGWINCH support ------------[ cut here ]------------ WARNING: at lib/list_debug.c:48 list_del+0xc6/0x100() list_del corruption, next is LIST_POISON1 (00100100) 082578c8: [<081fd77f>] dump_stack+0x22/0x24 082578e0: [<0807a18a>] warn_slowpath_common+0x5a/0x80 08257908: [<0807a23e>] warn_slowpath_fmt+0x2e/0x30 08257920: [<08172196>] list_del+0xc6/0x100 08257940: [<08060244>] free_winch+0x14/0x80 08257958: [<080606fb>] winch_interrupt+0xdb/0xe0 08257978: [<080a65b5>] handle_IRQ_event+0x35/0xe0 08257998: [<080a8717>] handle_edge_irq+0xb7/0x170 082579bc: [<08059bc4>] do_IRQ+0x34/0x50 082579d4: [<08059e1b>] sigio_handler+0x5b/0x80 082579ec: [<0806a374>] sig_handler_common+0x44/0xb0 08257a68: [<0806a538>] sig_handler+0x38/0x50 08257a78: [<0806a77c>] handle_signal+0x5c/0xa0 08257a9c: [<0806be28>] hard_handler+0x18/0x20 08257aac: [<00c14400>] 0xc14400 Signed-off-by: Will Newton Acked-by: WANG Cong Cc: Jeff Dike Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/line.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 7f7338c90784..1664cce7b0ac 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -727,6 +727,9 @@ struct winch { static void free_winch(struct winch *winch, int free_irq_ok) { + if (free_irq_ok) + free_irq(WINCH_IRQ, winch); + list_del(&winch->list); if (winch->pid != -1) @@ -735,8 +738,6 @@ static void free_winch(struct winch *winch, int free_irq_ok) os_close_file(winch->fd); if (winch->stack != 0) free_stack(winch->stack, 0); - if (free_irq_ok) - free_irq(WINCH_IRQ, winch); kfree(winch); } -- cgit v1.2.3 From 91d95fda8594ce5e0ccd81381ee7b956cf513c59 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 24 Nov 2010 12:57:18 -0800 Subject: arch/x86/include/asm/fixmap.h: mark __set_fixmap_offset as __always_inline When compiling arch/x86/kernel/early_printk_mrst.c with i386 allmodconfig, gcc-4.1.0 generates an out-of-line copy of __set_fixmap_offset() which contains a reference to __this_fixmap_does_not_exist which the compiler cannot elide. Marking __set_fixmap_offset() as __always_inline prevents this. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Feng Tang Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/fixmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 4d293dced62f..9479a037419f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -216,8 +216,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) } /* Return an pointer with offset calculated */ -static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) +static __always_inline unsigned long +__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { __set_fixmap(idx, phys, flags); return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); -- cgit v1.2.3 From f910043ce00791c06afc3789278447c4e88670ea Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 24 Nov 2010 11:09:03 -0800 Subject: OMAP: UART: don't resume UARTs that are not enabled. Add additional check to omap_uart_resume_idle() so that only enabled (specifically, idle-enabled) UARTs are allowed to resume. This matches the existing check in prepare idle. Without this patch, the system will hang if a board is configured to register only some uarts instead of all of them and PM is enabled. Cc: Govindraj R. Signed-off-by: Kevin Hilman [tony@atomide.com: updated description] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index becf0e38ef7e..bc934dbe3bf0 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -406,7 +406,7 @@ void omap_uart_resume_idle(int num) struct omap_uart_state *uart; list_for_each_entry(uart, &uart_list, node) { - if (num == uart->num) { + if (num == uart->num && uart->can_sleep) { omap_uart_enable_clocks(uart); /* Check for IO pad wakeup */ -- cgit v1.2.3 From 0d8e2d0dad98a693bad88aea6876ac8b94ad95c6 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Wed, 24 Nov 2010 16:49:05 -0700 Subject: OMAP2+: PM/serial: hold console semaphore while OMAP UARTs are disabled The console semaphore must be held while the OMAP UART devices are disabled, lest a console write cause an ARM abort (and a kernel crash) when the underlying console device is inaccessible. These crashes only occur when the console is on one of the OMAP internal serial ports. While this problem has been latent in the PM idle loop for some time, the crash was not triggerable with an unmodified kernel until commit 6f251e9db1093c187addc309b5f2f7fe3efd2995 ("OMAP: UART: omap_device conversions, remove implicit 8520 assumptions"). After this patch, a console write often occurs after the console UART has been disabled in the idle loop, crashing the system. Several users have encountered this bug: http://www.mail-archive.com/linux-omap@vger.kernel.org/msg38396.html http://www.mail-archive.com/linux-omap@vger.kernel.org/msg36602.html The same commit also introduced new code that disabled the UARTs during init, in omap_serial_init_port(). The kernel will also crash in this code when earlyconsole and extra debugging is enabled: http://www.mail-archive.com/linux-omap@vger.kernel.org/msg36411.html The minimal fix for the -rc series is to hold the console semaphore while the OMAP UARTs are disabled. This is a somewhat overbroad fix, since the console may not be located on an OMAP UART, as is the case with the GPMC UART on Zoom3. While it is technically possible to determine which devices the console or earlyconsole is actually running on, it is not a trivial problem to solve, and the code to do so is not really appropriate for the -rc series. The right long-term fix is to ensure that no code outside of the OMAP serial driver can disable an OMAP UART. As I understand it, code to implement this is under development by TI. This patch is a collaboration between Paul Walmsley and Tony Lindgren . Thanks to Ming Lei and Pramod for their feedback on earlier versions of this patch. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren Acked-by: Kevin Hilman Cc: Ming Lei Cc: Pramod Cc: Thomas Petazzoni Cc: Jean Pihet Cc: Govindraj.R --- arch/arm/mach-omap2/pm24xx.c | 7 +++++++ arch/arm/mach-omap2/pm34xx.c | 10 ++++++++++ arch/arm/mach-omap2/serial.c | 5 +++++ 3 files changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index a40457d81927..c85923e56b85 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -118,6 +119,10 @@ static void omap2_enter_full_retention(void) if (omap_irq_pending()) goto no_sleep; + /* Block console output in case it is on one of the OMAP UARTs */ + if (try_acquire_console_sem()) + goto no_sleep; + omap_uart_prepare_idle(0); omap_uart_prepare_idle(1); omap_uart_prepare_idle(2); @@ -131,6 +136,8 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(1); omap_uart_resume_idle(0); + release_console_sem(); + no_sleep: if (omap2_pm_debug) { unsigned long long tmp; diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 75c0cd13ad8e..0ec8a04b7473 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -385,6 +386,12 @@ void omap_sram_idle(void) omap3_enable_io_chain(); } + /* Block console output in case it is on one of the OMAP UARTs */ + if (per_next_state < PWRDM_POWER_ON || + core_next_state < PWRDM_POWER_ON) + if (try_acquire_console_sem()) + goto console_still_active; + /* PER */ if (per_next_state < PWRDM_POWER_ON) { omap_uart_prepare_idle(2); @@ -463,6 +470,9 @@ void omap_sram_idle(void) omap_uart_resume_idle(3); } + release_console_sem(); + +console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ if (omap3_has_io_wakeup() && (per_next_state < PWRDM_POWER_ON || diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index bc934dbe3bf0..d17960a1be25 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_SERIAL_OMAP #include @@ -807,6 +808,8 @@ void __init omap_serial_init_port(int port) oh->dev_attr = uart; + acquire_console_sem(); /* in case the earlycon is on the UART */ + /* * Because of early UART probing, UART did not get idled * on init. Now that omap_device is ready, ensure full idle @@ -831,6 +834,8 @@ void __init omap_serial_init_port(int port) omap_uart_block_sleep(uart); uart->timeout = DEFAULT_TIMEOUT; + release_console_sem(); + if ((cpu_is_omap34xx() && uart->padconf) || (uart->wk_en && uart->wk_mask)) { device_init_wakeup(&od->pdev.dev, true); -- cgit v1.2.3 From 6fc50eafc49262e376455a480f9d793817fe74e2 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Tue, 16 Nov 2010 18:11:59 +0900 Subject: ARM: S3C2410: Adapt h1940-bluetooth to gpiolib changes Replace in s3c_gpio_cfgpull with s3c_gpio_setpull. Signed-off-by: Vasily Khoruzhick Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c2410/h1940-bluetooth.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c index 8aa2f1902a94..6b86a722a7db 100644 --- a/arch/arm/mach-s3c2410/h1940-bluetooth.c +++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c @@ -77,13 +77,13 @@ static int __devinit h1940bt_probe(struct platform_device *pdev) /* Configures BT serial port GPIOs */ s3c_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0); - s3c_gpio_cfgpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT); - s3c_gpio_cfgpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0); - s3c_gpio_cfgpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0); - s3c_gpio_cfgpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE); rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH, -- cgit v1.2.3 From cce58ab380727169ef72b76481441f851e5850b0 Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Mon, 15 Nov 2010 11:11:22 +0900 Subject: ARM: S3C24XX: Fix wrong s3c_gpio_cfgpull This patch fixes wrong s3c_gpio_cfgpull with s3c_gpio_setpull. Cc: Ben Dooks Signed-off-by: Kukjin Kim --- arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c | 6 +++--- arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c | 6 +++--- arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c index 9793544a6ace..704175b0573f 100644 --- a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c +++ b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c @@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus0_gpe11_12_13(struct s3c2410_spi_info *spi, } else { s3c_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPIO_INPUT); s3c_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPIO_INPUT); - s3c_gpio_cfgpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE); } } diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c index db9e9e477ec1..72457afd6255 100644 --- a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c +++ b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c @@ -31,8 +31,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpd8_9_10(struct s3c2410_spi_info *spi, } else { s3c_gpio_cfgpin(S3C2410_GPD(8), S3C2410_GPIO_INPUT); s3c_gpio_cfgpin(S3C2410_GPD(9), S3C2410_GPIO_INPUT); - s3c_gpio_cfgpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE); } } diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c index 8ea663a438bb..c3972b645d13 100644 --- a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c +++ b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c @@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpg5_6_7(struct s3c2410_spi_info *spi, } else { s3c_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPIO_INPUT); s3c_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPIO_INPUT); - s3c_gpio_cfgpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE); - s3c_gpio_cfgpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE); + s3c_gpio_setpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE); } } -- cgit v1.2.3 From 8b8c87dee47ae7e41af95d03ca56b3a4633466a6 Mon Sep 17 00:00:00 2001 From: Darius Augulis Date: Tue, 16 Nov 2010 18:08:50 +0900 Subject: ARM: S3C64XX: fix uart clock setup for mini6410/real6410 Don't rewrite clock config in UCON preconfigured by bootloader. No need to set 10th bit in UCON because [11:10] 2'b00 means source clock is PCLK too. If set, console does not work if bootloader has preconfigured [11:10] with 2'b00. If not set, console works with any bootloader config value (2'bxx). More information about clock setup in UCON is available in "S3C6410X RISC Microprocessor User's Manual, Revision 1.20" p. 31-13 (Chapter 31.6.2 UART CONTROL REGISTER). Signed-off-by: Darius Augulis Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c64xx/mach-mini6410.c | 2 +- arch/arm/mach-s3c64xx/mach-real6410.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c index 249c62956471..89f35e02e883 100644 --- a/arch/arm/mach-s3c64xx/mach-mini6410.c +++ b/arch/arm/mach-s3c64xx/mach-mini6410.c @@ -45,7 +45,7 @@ #include