summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Kconfig.debug63
-rw-r--r--lib/Makefile2
-rw-r--r--lib/atomic64_test.c2
-rw-r--r--lib/devres.c2
-rw-r--r--lib/flex_array.c25
-rw-r--r--lib/iommu-helper.c9
-rw-r--r--lib/ioremap.c10
-rw-r--r--lib/list_debug.c6
-rw-r--r--lib/lmb.c541
-rw-r--r--lib/percpu_counter.c27
-rw-r--r--lib/radix-tree.c94
-rw-r--r--lib/random32.c2
-rw-r--r--lib/rbtree.c116
-rw-r--r--lib/rwsem.c150
-rw-r--r--lib/swiotlb.c137
-rw-r--r--lib/vsprintf.c23
17 files changed, 456 insertions, 756 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 170d8ca901d8..5b916bc0fbae 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -181,9 +181,6 @@ config HAS_DMA
config CHECK_SIGNATURE
bool
-config HAVE_LMB
- boolean
-
config CPUMASK_OFFSTACK
bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
help
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 95ab402db9c0..9e06b7f5ecf1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -76,7 +76,6 @@ config UNUSED_SYMBOLS
config DEBUG_FS
bool "Debug Filesystem"
- depends on SYSFS
help
debugfs is a virtual file system that kernel developers use to put
debugging files into. Enable this option to be able to read and
@@ -152,28 +151,33 @@ config DEBUG_SHIRQ
Drivers ought to be able to handle interrupts coming in at those
points; some don't and need to be caught.
-config DETECT_SOFTLOCKUP
- bool "Detect Soft Lockups"
+config LOCKUP_DETECTOR
+ bool "Detect Hard and Soft Lockups"
depends on DEBUG_KERNEL && !S390
- default y
help
- Say Y here to enable the kernel to detect "soft lockups",
- which are bugs that cause the kernel to loop in kernel
+ Say Y here to enable the kernel to act as a watchdog to detect
+ hard and soft lockups.
+
+ Softlockups are bugs that cause the kernel to loop in kernel
mode for more than 60 seconds, without giving other tasks a
- chance to run.
+ chance to run. The current stack trace is displayed upon
+ detection and the system will stay locked up.
- When a soft-lockup is detected, the kernel will print the
- current stack trace (which you should report), but the
- system will stay locked up. This feature has negligible
- overhead.
+ Hardlockups are bugs that cause the CPU to loop in kernel mode
+ for more than 60 seconds, without letting other interrupts have a
+ chance to run. The current stack trace is displayed upon detection
+ and the system will stay locked up.
- (Note that "hard lockups" are separate type of bugs that
- can be detected via the NMI-watchdog, on platforms that
- support it.)
+ The overhead should be minimal. A periodic hrtimer runs to
+ generate interrupts and kick the watchdog task every 10-12 seconds.
+ An NMI is generated every 60 seconds or so to check for hardlockups.
+
+config HARDLOCKUP_DETECTOR
+ def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
config BOOTPARAM_SOFTLOCKUP_PANIC
bool "Panic (Reboot) On Soft Lockups"
- depends on DETECT_SOFTLOCKUP
+ depends on LOCKUP_DETECTOR
help
Say Y here to enable the kernel to panic on "soft lockups",
which are bugs that cause the kernel to loop in kernel
@@ -190,7 +194,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
int
- depends on DETECT_SOFTLOCKUP
+ depends on LOCKUP_DETECTOR
range 0 1
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -307,6 +311,12 @@ config DEBUG_OBJECTS_WORK
work queue routines to track the life time of work objects and
validate the work operations.
+config DEBUG_OBJECTS_RCU_HEAD
+ bool "Debug RCU callbacks objects"
+ depends on DEBUG_OBJECTS && PREEMPT
+ help
+ Enable this to turn on debugging of RCU list heads (call_rcu() usage).
+
config DEBUG_OBJECTS_ENABLE_DEFAULT
int "debug_objects bootup default value (0-1)"
range 0 1
@@ -535,7 +545,7 @@ config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
select STACKTRACE
- select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390
+ select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
select KALLSYMS
select KALLSYMS_ALL
@@ -635,6 +645,19 @@ config DEBUG_INFO
If unsure, say N.
+config DEBUG_INFO_REDUCED
+ bool "Reduce debugging information"
+ depends on DEBUG_INFO
+ help
+ If you say Y here gcc is instructed to generate less debugging
+ information for structure types. This means that tools that
+ need full debugging information (like kgdb or systemtap) won't
+ be happy. But if you merely need debugging information to
+ resolve line numbers there is no loss. Advantage is that
+ build directory object sizes shrink dramatically over a full
+ DEBUG_INFO build and compile times are reduced too.
+ Only works with newer gcc versions.
+
config DEBUG_VM
bool "Debug VM"
depends on DEBUG_KERNEL
@@ -944,7 +967,7 @@ config FAIL_MAKE_REQUEST
Provide fault-injection capability for disk IO.
config FAIL_IO_TIMEOUT
- bool "Faul-injection capability for faking disk interrupts"
+ bool "Fault-injection capability for faking disk interrupts"
depends on FAULT_INJECTION && BLOCK
help
Provide fault-injection capability on end IO handling. This
@@ -965,13 +988,13 @@ config FAULT_INJECTION_STACKTRACE_FILTER
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
depends on !X86_64
select STACKTRACE
- select FRAME_POINTER if !PPC && !S390
+ select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
help
Provide stacktrace filter for fault-injection capabilities
config LATENCYTOP
bool "Latency measuring infrastructure"
- select FRAME_POINTER if !MIPS && !PPC && !S390
+ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
select KALLSYMS
select KALLSYMS_ALL
select STACKTRACE
diff --git a/lib/Makefile b/lib/Makefile
index 3f1062cbbff4..0bfabba1bb32 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -89,8 +89,6 @@ obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
-obj-$(CONFIG_HAVE_LMB) += lmb.o
-
obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 250ed11d3ed2..44524cc8c32a 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -114,7 +114,7 @@ static __init int test_atomic64(void)
BUG_ON(v.counter != r);
#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
- defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H)
+ defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
INIT(onestwos);
BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
r -= one;
diff --git a/lib/devres.c b/lib/devres.c
index 49368608f988..6efddf53b90c 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -328,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
* @pdev: PCI device to map IO resources for
* @mask: Mask of BARs to unmap and release
*
- * Unamp and release regions specified by @mask.
+ * Unmap and release regions specified by @mask.
*/
void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
{
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 41b1804fa728..77a6fea7481e 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -171,6 +171,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
* Note that this *copies* the contents of @src into
* the array. If you are trying to store an array of
* pointers, make sure to pass in &ptr instead of ptr.
+ * You may instead wish to use the flex_array_put_ptr()
+ * helper function.
*
* Locking must be provided by the caller.
*/
@@ -265,7 +267,8 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
*
* Returns a pointer to the data at index @element_nr. Note
* that this is a copy of the data that was passed in. If you
- * are using this to store pointers, you'll get back &ptr.
+ * are using this to store pointers, you'll get back &ptr. You
+ * may instead wish to use the flex_array_get_ptr helper.
*
* Locking must be provided by the caller.
*/
@@ -286,6 +289,26 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
return &part->elements[index_inside_part(fa, element_nr)];
}
+/**
+ * flex_array_get_ptr - pull a ptr back out of the array
+ * @fa: the flex array from which to extract data
+ * @element_nr: index of the element to fetch from the array
+ *
+ * Returns the pointer placed in the flex array at element_nr using
+ * flex_array_put_ptr(). This function should not be called if the
+ * element in question was not set using the _put_ptr() helper.
+ */
+void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
+{
+ void **tmp;
+
+ tmp = flex_array_get(fa, element_nr);
+ if (!tmp)
+ return NULL;
+
+ return *tmp;
+}
+
static int part_is_free(struct flex_array_part *part)
{
int i;
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c0251f4ad08b..da053313ee5c 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -38,12 +38,3 @@ again:
return -1;
}
EXPORT_SYMBOL(iommu_area_alloc);
-
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
- unsigned long io_page_size)
-{
- unsigned long size = (addr & (io_page_size - 1)) + len;
-
- return DIV_ROUND_UP(size, io_page_size);
-}
-EXPORT_SYMBOL(iommu_num_pages);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 14c6078f17a2..5730ecd3eb66 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,10 +13,10 @@
#include <asm/pgtable.h>
static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, unsigned long phys_addr, pgprot_t prot)
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
pte_t *pte;
- unsigned long pfn;
+ u64 pfn;
pfn = phys_addr >> PAGE_SHIFT;
pte = pte_alloc_kernel(pmd, addr);
@@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
}
static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
- unsigned long end, unsigned long phys_addr, pgprot_t prot)
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
pmd_t *pmd;
unsigned long next;
@@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
}
static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
- unsigned long end, unsigned long phys_addr, pgprot_t prot)
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
pud_t *pud;
unsigned long next;
@@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
}
int ioremap_page_range(unsigned long addr,
- unsigned long end, unsigned long phys_addr, pgprot_t prot)
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
pgd_t *pgd;
unsigned long start;
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 1a39f4e3ae1f..344c710d16ca 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add);
*/
void list_del(struct list_head *entry)
{
+ WARN(entry->next == LIST_POISON1,
+ "list_del corruption, next is LIST_POISON1 (%p)\n",
+ LIST_POISON1);
+ WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
+ "list_del corruption, prev is LIST_POISON2 (%p)\n",
+ LIST_POISON2);
WARN(entry->prev->next != entry,
"list_del corruption. prev->next should be %p, "
"but was %p\n", entry, entry->prev->next);
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index b1fc52606524..000000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Procedures for maintaining information about logical memory blocks.
- *
- * Peter Bergner, IBM Corp. June 2001.
- * Copyright (C) 2001 Peter Bergner.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/lmb.h>
-
-#define LMB_ALLOC_ANYWHERE 0
-
-struct lmb lmb;
-
-static int lmb_debug;
-
-static int __init early_lmb(char *p)
-{
- if (p && strstr(p, "debug"))
- lmb_debug = 1;
- return 0;
-}
-early_param("lmb", early_lmb);
-
-static void lmb_dump(struct lmb_region *region, char *name)
-{
- unsigned long long base, size;
- int i;
-
- pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
-
- for (i = 0; i < region->cnt; i++) {
- base = region->region[i].base;
- size = region->region[i].size;
-
- pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
- name, i, base, base + size - 1, size);
- }
-}
-
-void lmb_dump_all(void)
-{
- if (!lmb_debug)
- return;
-
- pr_info("LMB configuration:\n");
- pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size);
- pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
-
- lmb_dump(&lmb.memory, "memory");
- lmb_dump(&lmb.reserved, "reserved");
-}
-
-static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
- u64 size2)
-{
- return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
-}
-
-static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
-{
- if (base2 == base1 + size1)
- return 1;
- else if (base1 == base2 + size2)
- return -1;
-
- return 0;
-}
-
-static long lmb_regions_adjacent(struct lmb_region *rgn,
- unsigned long r1, unsigned long r2)
-{
- u64 base1 = rgn->region[r1].base;
- u64 size1 = rgn->region[r1].size;
- u64 base2 = rgn->region[r2].base;
- u64 size2 = rgn->region[r2].size;
-
- return lmb_addrs_adjacent(base1, size1, base2, size2);
-}
-
-static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
-{
- unsigned long i;
-
- for (i = r; i < rgn->cnt - 1; i++) {
- rgn->region[i].base = rgn->region[i + 1].base;
- rgn->region[i].size = rgn->region[i + 1].size;
- }
- rgn->cnt--;
-}
-
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void lmb_coalesce_regions(struct lmb_region *rgn,
- unsigned long r1, unsigned long r2)
-{
- rgn->region[r1].size += rgn->region[r2].size;
- lmb_remove_region(rgn, r2);
-}
-
-void __init lmb_init(void)
-{
- /* Create a dummy zero size LMB which will get coalesced away later.
- * This simplifies the lmb_add() code below...
- */
- lmb.memory.region[0].base = 0;
- lmb.memory.region[0].size = 0;
- lmb.memory.cnt = 1;
-
- /* Ditto. */
- lmb.reserved.region[0].base = 0;
- lmb.reserved.region[0].size = 0;
- lmb.reserved.cnt = 1;
-}
-
-void __init lmb_analyze(void)
-{
- int i;
-
- lmb.memory.size = 0;
-
- for (i = 0; i < lmb.memory.cnt; i++)
- lmb.memory.size += lmb.memory.region[i].size;
-}
-
-static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
-{
- unsigned long coalesced = 0;
- long adjacent, i;
-
- if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
- rgn->region[0].base = base;
- rgn->region[0].size = size;
- return 0;
- }
-
- /* First try and coalesce this LMB with another. */
- for (i = 0; i < rgn->cnt; i++) {
- u64 rgnbase = rgn->region[i].base;
- u64 rgnsize = rgn->region[i].size;
-
- if ((rgnbase == base) && (rgnsize == size))
- /* Already have this region, so we're done */
- return 0;
-
- adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
- if (adjacent > 0) {
- rgn->region[i].base -= size;
- rgn->region[i].size += size;
- coalesced++;
- break;
- } else if (adjacent < 0) {
- rgn->region[i].size += size;
- coalesced++;
- break;
- }
- }
-
- if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
- lmb_coalesce_regions(rgn, i, i+1);
- coalesced++;
- }
-
- if (coalesced)
- return coalesced;
- if (rgn->cnt >= MAX_LMB_REGIONS)
- return -1;
-
- /* Couldn't coalesce the LMB, so add it to the sorted table. */
- for (i = rgn->cnt - 1; i >= 0; i--) {
- if (base < rgn->region[i].base) {
- rgn->region[i+1].base = rgn->region[i].base;
- rgn->region[i+1].size = rgn->region[i].size;
- } else {
- rgn->region[i+1].base = base;
- rgn->region[i+1].size = size;
- break;
- }
- }
-
- if (base < rgn->region[0].base) {
- rgn->region[0].base = base;
- rgn->region[0].size = size;
- }
- rgn->cnt++;
-
- return 0;
-}
-
-long lmb_add(u64 base, u64 size)
-{
- struct lmb_region *_rgn = &lmb.memory;
-
- /* On pSeries LPAR systems, the first LMB is our RMO region. */
- if (base == 0)
- lmb.rmo_size = size;
-
- return lmb_add_region(_rgn, base, size);
-
-}
-
-static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size)
-{
- u64 rgnbegin, rgnend;
- u64 end = base + size;
- int i;
-
- rgnbegin = rgnend = 0; /* supress gcc warnings */
-
- /* Find the region where (base, size) belongs to */
- for (i=0; i < rgn->cnt; i++) {
- rgnbegin = rgn->region[i].base;
- rgnend = rgnbegin + rgn->region[i].size;
-
- if ((rgnbegin <= base) && (end <= rgnend))
- break;
- }
-
- /* Didn't find the region */
- if (i == rgn->cnt)
- return -1;
-
- /* Check to see if we are removing entire region */
- if ((rgnbegin == base) && (rgnend == end)) {
- lmb_remove_region(rgn, i);
- return 0;
- }
-
- /* Check to see if region is matching at the front */
- if (rgnbegin == base) {
- rgn->region[i].base = end;
- rgn->region[i].size -= size;
- return 0;
- }
-
- /* Check to see if the region is matching at the end */
- if (rgnend == end) {
- rgn->region[i].size -= size;
- return 0;
- }
-
- /*
- * We need to split the entry - adjust the current one to the
- * beginging of the hole and add the region after hole.
- */
- rgn->region[i].size = base - rgn->region[i].base;
- return lmb_add_region(rgn, end, rgnend - end);
-}
-
-long lmb_remove(u64 base, u64 size)
-{
- return __lmb_remove(&lmb.memory, base, size);
-}
-
-long __init lmb_free(u64 base, u64 size)
-{
- return __lmb_remove(&lmb.reserved, base, size);
-}
-
-long __init lmb_reserve(u64 base, u64 size)
-{
- struct lmb_region *_rgn = &lmb.reserved;
-
- BUG_ON(0 == size);
-
- return lmb_add_region(_rgn, base, size);
-}
-
-long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
-{
- unsigned long i;
-
- for (i = 0; i < rgn->cnt; i++) {
- u64 rgnbase = rgn->region[i].base;
- u64 rgnsize = rgn->region[i].size;
- if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
- break;
- }
-
- return (i < rgn->cnt) ? i : -1;
-}
-
-static u64 lmb_align_down(u64 addr, u64 size)
-{
- return addr & ~(size - 1);
-}
-
-static u64 lmb_align_up(u64 addr, u64 size)
-{
- return (addr + (size - 1)) & ~(size - 1);
-}
-
-static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
- u64 size, u64 align)
-{
- u64 base, res_base;
- long j;
-
- base = lmb_align_down((end - size), align);
- while (start <= base) {
- j = lmb_overlaps_region(&lmb.reserved, base, size);
- if (j < 0) {
- /* this area isn't reserved, take it */
- if (lmb_add_region(&lmb.reserved, base, size) < 0)
- base = ~(u64)0;
- return base;
- }
- res_base = lmb.reserved.region[j].base;
- if (res_base < size)
- break;
- base = lmb_align_down(res_base - size, align);
- }
-
- return ~(u64)0;
-}
-
-static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
- u64 (*nid_range)(u64, u64, int *),
- u64 size, u64 align, int nid)
-{
- u64 start, end;
-
- start = mp->base;
- end = start + mp->size;
-
- start = lmb_align_up(start, align);
- while (start < end) {
- u64 this_end;
- int this_nid;
-
- this_end = nid_range(start, end, &this_nid);
- if (this_nid == nid) {
- u64 ret = lmb_alloc_nid_unreserved(start, this_end,
- size, align);
- if (ret != ~(u64)0)
- return ret;
- }
- start = this_end;
- }
-
- return ~(u64)0;
-}
-
-u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
- u64 (*nid_range)(u64 start, u64 end, int *nid))
-{
- struct lmb_region *mem = &lmb.memory;
- int i;
-
- BUG_ON(0 == size);
-
- size = lmb_align_up(size, align);
-
- for (i = 0; i < mem->cnt; i++) {
- u64 ret = lmb_alloc_nid_region(&mem->region[i],
- nid_range,
- size, align, nid);
- if (ret != ~(u64)0)
- return ret;
- }
-
- return lmb_alloc(size, align);
-}
-
-u64 __init lmb_alloc(u64 size, u64 align)
-{
- return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
-}
-
-u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
- u64 alloc;
-
- alloc = __lmb_alloc_base(size, align, max_addr);
-
- if (alloc == 0)
- panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
- (unsigned long long) size, (unsigned long long) max_addr);
-
- return alloc;
-}
-
-u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
- long i, j;
- u64 base = 0;
- u64 res_base;
-
- BUG_ON(0 == size);
-
- size = lmb_align_up(size, align);
-
- /* On some platforms, make sure we allocate lowmem */
- /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
- if (max_addr == LMB_ALLOC_ANYWHERE)
- max_addr = LMB_REAL_LIMIT;
-
- for (i = lmb.memory.cnt - 1; i >= 0; i--) {
- u64 lmbbase = lmb.memory.region[i].base;
- u64 lmbsize = lmb.memory.region[i].size;
-
- if (lmbsize < size)
- continue;
- if (max_addr == LMB_ALLOC_ANYWHERE)
- base = lmb_align_down(lmbbase + lmbsize - size, align);
- else if (lmbbase < max_addr) {
- base = min(lmbbase + lmbsize, max_addr);
- base = lmb_align_down(base - size, align);
- } else
- continue;
-
- while (base && lmbbase <= base) {
- j = lmb_overlaps_region(&lmb.reserved, base, size);
- if (j < 0) {
- /* this area isn't reserved, take it */
- if (lmb_add_region(&lmb.reserved, base, size) < 0)
- return 0;
- return base;
- }
- res_base = lmb.reserved.region[j].base;
- if (res_base < size)
- break;
- base = lmb_align_down(res_base - size, align);
- }
- }
- return 0;
-}
-
-/* You must call lmb_analyze() before this. */
-u64 __init lmb_phys_mem_size(void)
-{
- return lmb.memory.size;
-}
-
-u64 lmb_end_of_DRAM(void)
-{
- int idx = lmb.memory.cnt - 1;
-
- return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
-}
-
-/* You must call lmb_analyze() after this. */
-void __init lmb_enforce_memory_limit(u64 memory_limit)
-{
- unsigned long i;
- u64 limit;
- struct lmb_property *p;
-
- if (!memory_limit)
- return;
-
- /* Truncate the lmb regions to satisfy the memory limit. */
- limit = memory_limit;
- for (i = 0; i < lmb.memory.cnt; i++) {
- if (limit > lmb.memory.region[i].size) {
- limit -= lmb.memory.region[i].size;
- continue;
- }
-
- lmb.memory.region[i].size = limit;
- lmb.memory.cnt = i + 1;
- break;
- }
-
- if (lmb.memory.region[0].size < lmb.rmo_size)
- lmb.rmo_size = lmb.memory.region[0].size;
-
- memory_limit = lmb_end_of_DRAM();
-
- /* And truncate any reserves above the limit also. */
- for (i = 0; i < lmb.reserved.cnt; i++) {
- p = &lmb.reserved.region[i];
-
- if (p->base > memory_limit)
- p->size = 0;
- else if ((p->base + p->size) > memory_limit)
- p->size = memory_limit - p->base;
-
- if (p->size == 0) {
- lmb_remove_region(&lmb.reserved, i);
- i--;
- }
- }
-}
-
-int __init lmb_is_reserved(u64 addr)
-{
- int i;
-
- for (i = 0; i < lmb.reserved.cnt; i++) {
- u64 upper = lmb.reserved.region[i].base +
- lmb.reserved.region[i].size - 1;
- if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
- return 1;
- }
- return 0;
-}
-
-int lmb_is_region_reserved(u64 base, u64 size)
-{
- return lmb_overlaps_region(&lmb.reserved, base, size);
-}
-
-/*
- * Given a <base, len>, find which memory regions belong to this range.
- * Adjust the request and return a contiguous chunk.
- */
-int lmb_find(struct lmb_property *res)
-{
- int i;
- u64 rstart, rend;
-
- rstart = res->base;
- rend = rstart + res->size - 1;
-
- for (i = 0; i < lmb.memory.cnt; i++) {
- u64 start = lmb.memory.region[i].base;
- u64 end = start + lmb.memory.region[i].size - 1;
-
- if (start > rend)
- return -1;
-
- if ((end >= rstart) && (start < rend)) {
- /* adjust the request */
- if (rstart < start)
- rstart = start;
- if (rend > end)
- rend = end;
- res->base = rstart;
- res->size = rend - rstart + 1;
- return 0;
- }
- }
- return -1;
-}
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d734447..ec9048e74f44 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -137,6 +137,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
return NOTIFY_OK;
}
+/*
+ * Compare counter against given value.
+ * Return 1 if greater, 0 if equal and -1 if less
+ */
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+ s64 count;
+
+ count = percpu_counter_read(fbc);
+ /* Check to see if rough count will be sufficient for comparison */
+ if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
+ if (count > rhs)
+ return 1;
+ else
+ return -1;
+ }
+ /* Need to use precise count */
+ count = percpu_counter_sum(fbc);
+ if (count > rhs)
+ return 1;
+ else if (count < rhs)
+ return -1;
+ else
+ return 0;
+}
+EXPORT_SYMBOL(percpu_counter_compare);
+
static int __init percpu_counter_startup(void)
{
compute_batch_value();
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 05da38bcc298..e907858498a6 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -609,6 +609,100 @@ int radix_tree_tag_get(struct radix_tree_root *root,
EXPORT_SYMBOL(radix_tree_tag_get);
/**
+ * radix_tree_range_tag_if_tagged - for each item in given range set given
+ * tag if item has another tag set
+ * @root: radix tree root
+ * @first_indexp: pointer to a starting index of a range to scan
+ * @last_index: last index of a range to scan
+ * @nr_to_tag: maximum number items to tag
+ * @iftag: tag index to test
+ * @settag: tag index to set if tested tag is set
+ *
+ * This function scans range of radix tree from first_index to last_index
+ * (inclusive). For each item in the range if iftag is set, the function sets
+ * also settag. The function stops either after tagging nr_to_tag items or
+ * after reaching last_index.
+ *
+ * The function returns number of leaves where the tag was set and sets
+ * *first_indexp to the first unscanned index.
+ */
+unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
+ unsigned long *first_indexp, unsigned long last_index,
+ unsigned long nr_to_tag,
+ unsigned int iftag, unsigned int settag)
+{
+ unsigned int height = root->height, shift;
+ unsigned long tagged = 0, index = *first_indexp;
+ struct radix_tree_node *open_slots[height], *slot;
+
+ last_index = min(last_index, radix_tree_maxindex(height));
+ if (index > last_index)
+ return 0;
+ if (!nr_to_tag)
+ return 0;
+ if (!root_tag_get(root, iftag)) {
+ *first_indexp = last_index + 1;
+ return 0;
+ }
+ if (height == 0) {
+ *first_indexp = last_index + 1;
+ root_tag_set(root, settag);
+ return 1;
+ }
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = radix_tree_indirect_to_ptr(root->rnode);
+
+ for (;;) {
+ int offset;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (!slot->slots[offset])
+ goto next;
+ if (!tag_get(slot, iftag, offset))
+ goto next;
+ tag_set(slot, settag, offset);
+ if (height == 1) {
+ tagged++;
+ goto next;
+ }
+ /* Go down one level */
+ height--;
+ shift -= RADIX_TREE_MAP_SHIFT;
+ open_slots[height] = slot;
+ slot = slot->slots[offset];
+ continue;
+next:
+ /* Go to next item at level determined by 'shift' */
+ index = ((index >> shift) + 1) << shift;
+ if (index > last_index)
+ break;
+ if (tagged >= nr_to_tag)
+ break;
+ while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
+ /*
+ * We've fully scanned this node. Go up. Because
+ * last_index is guaranteed to be in the tree, what
+ * we do below cannot wander astray.
+ */
+ slot = open_slots[height];
+ height++;
+ shift += RADIX_TREE_MAP_SHIFT;
+ }
+ }
+ /*
+ * The iftag must have been set somewhere because otherwise
+ * we would return immediated at the beginning of the function
+ */
+ root_tag_set(root, settag);
+ *first_indexp = index;
+
+ return tagged;
+}
+EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
+
+
+/**
* radix_tree_next_hole - find the next hole (not-present entry)
* @root: tree root
* @index: index key
diff --git a/lib/random32.c b/lib/random32.c
index 870dc3fc0f0f..fc3545a32771 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -127,7 +127,7 @@ core_initcall(random32_init);
/*
* Generate better values after random number generator
- * is fully initalized.
+ * is fully initialized.
*/
static int __init random32_reseed(void)
{
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 15e10b1afdd2..4693f79195d3 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,11 +44,6 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
else
root->rb_node = right;
rb_set_parent(node, right);
-
- if (root->augment_cb) {
- root->augment_cb(node);
- root->augment_cb(right);
- }
}
static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
@@ -72,20 +67,12 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
else
root->rb_node = left;
rb_set_parent(node, left);
-
- if (root->augment_cb) {
- root->augment_cb(node);
- root->augment_cb(left);
- }
}
void rb_insert_color(struct rb_node *node, struct rb_root *root)
{
struct rb_node *parent, *gparent;
- if (root->augment_cb)
- root->augment_cb(node);
-
while ((parent = rb_parent(node)) && rb_is_red(parent))
{
gparent = rb_parent(parent);
@@ -240,15 +227,12 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
else
{
struct rb_node *old = node, *left;
- int old_parent_cb = 0;
- int successor_parent_cb = 0;
node = node->rb_right;
while ((left = node->rb_left) != NULL)
node = left;
if (rb_parent(old)) {
- old_parent_cb = 1;
if (rb_parent(old)->rb_left == old)
rb_parent(old)->rb_left = node;
else
@@ -263,10 +247,8 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
if (parent == old) {
parent = node;
} else {
- successor_parent_cb = 1;
if (child)
rb_set_parent(child, parent);
-
parent->rb_left = child;
node->rb_right = old->rb_right;
@@ -277,24 +259,6 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
node->rb_left = old->rb_left;
rb_set_parent(old->rb_left, node);
- if (root->augment_cb) {
- /*
- * Here, three different nodes can have new children.
- * The parent of the successor node that was selected
- * to replace the node to be erased.
- * The node that is getting erased and is now replaced
- * by its successor.
- * The parent of the node getting erased-replaced.
- */
- if (successor_parent_cb)
- root->augment_cb(parent);
-
- root->augment_cb(node);
-
- if (old_parent_cb)
- root->augment_cb(rb_parent(old));
- }
-
goto color;
}
@@ -303,19 +267,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
if (child)
rb_set_parent(child, parent);
-
- if (parent) {
+ if (parent)
+ {
if (parent->rb_left == node)
parent->rb_left = child;
else
parent->rb_right = child;
-
- if (root->augment_cb)
- root->augment_cb(parent);
-
- } else {
- root->rb_node = child;
}
+ else
+ root->rb_node = child;
color:
if (color == RB_BLACK)
@@ -323,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
}
EXPORT_SYMBOL(rb_erase);
+static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
+{
+ struct rb_node *parent;
+
+up:
+ func(node, data);
+ parent = rb_parent(node);
+ if (!parent)
+ return;
+
+ if (node == parent->rb_left && parent->rb_right)
+ func(parent->rb_right, data);
+ else if (parent->rb_left)
+ func(parent->rb_left, data);
+
+ node = parent;
+ goto up;
+}
+
+/*
+ * after inserting @node into the tree, update the tree to account for
+ * both the new entry and any damage done by rebalance
+ */
+void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
+{
+ if (node->rb_left)
+ node = node->rb_left;
+ else if (node->rb_right)
+ node = node->rb_right;
+
+ rb_augment_path(node, func, data);
+}
+
+/*
+ * before removing the node, find the deepest node on the rebalance path
+ * that will still be there after @node gets removed
+ */
+struct rb_node *rb_augment_erase_begin(struct rb_node *node)
+{
+ struct rb_node *deepest;
+
+ if (!node->rb_right && !node->rb_left)
+ deepest = rb_parent(node);
+ else if (!node->rb_right)
+ deepest = node->rb_left;
+ else if (!node->rb_left)
+ deepest = node->rb_right;
+ else {
+ deepest = rb_next(node);
+ if (deepest->rb_right)
+ deepest = deepest->rb_right;
+ else if (rb_parent(deepest) != node)
+ deepest = rb_parent(deepest);
+ }
+
+ return deepest;
+}
+
+/*
+ * after removal, update the tree to account for the removed entry
+ * and any rebalance damage.
+ */
+void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
+{
+ if (node)
+ rb_augment_path(node, func, data);
+}
+
/*
* This function returns the first node (in sort order) of the tree.
*/
diff --git a/lib/rwsem.c b/lib/rwsem.c
index ceba8e28807a..f236d7cd5cf3 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,45 +36,56 @@ struct rwsem_waiter {
#define RWSEM_WAITING_FOR_WRITE 0x00000002
};
+/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
+ * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
+ * since the rwsem value was observed.
+ */
+#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
+#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
+#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
+
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here from up_xxxx(), then:
* - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
* - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
- * - there must be someone on the queue
+ * - there must be someone on the queue
* - the spinlock must be held by the caller
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if downgrading is false
*/
-static inline struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
+static struct rw_semaphore *
+__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
struct list_head *next;
- signed long oldcount, woken, loop;
-
- if (downgrading)
- goto dont_wake_writers;
-
- /* if we came through an up_xxxx() call, we only only wake someone up
- * if we can transition the active part of the count from 0 -> 1
- */
- try_again:
- oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
- - RWSEM_ACTIVE_BIAS;
- if (oldcount & RWSEM_ACTIVE_MASK)
- goto undo;
+ signed long oldcount, woken, loop, adjustment;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-
- /* try to grant a single write lock if there's a writer at the front
- * of the queue - note we leave the 'active part' of the count
- * incremented by 1 and the waiting part incremented by 0x00010000
- */
if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
goto readers_only;
+ if (wake_type == RWSEM_WAKE_READ_OWNED)
+ /* Another active reader was observed, so wakeup is not
+ * likely to succeed. Save the atomic op.
+ */
+ goto out;
+
+ /* There's a writer at the front of the queue - try to grant it the
+ * write lock. However, we only wake this writer if we can transition
+ * the active part of the count from 0 -> 1
+ */
+ adjustment = RWSEM_ACTIVE_WRITE_BIAS;
+ if (waiter->list.next == &sem->wait_list)
+ adjustment -= RWSEM_WAITING_BIAS;
+
+ try_again_write:
+ oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+ if (oldcount & RWSEM_ACTIVE_MASK)
+ /* Someone grabbed the sem already */
+ goto undo_write;
+
/* We must be careful not to touch 'waiter' after we set ->task = NULL.
* It is an allocated on the waiter's stack and may become invalid at
* any time after that point (due to a wakeup from another source).
@@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
put_task_struct(tsk);
goto out;
- /* don't want to wake any writers */
- dont_wake_writers:
- waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
- if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
+ readers_only:
+ /* If we come here from up_xxxx(), another thread might have reached
+ * rwsem_down_failed_common() before we acquired the spinlock and
+ * woken up a waiter, making it now active. We prefer to check for
+ * this first in order to not spend too much time with the spinlock
+ * held if we're not going to be able to wake up readers in the end.
+ *
+ * Note that we do not need to update the rwsem count: any writer
+ * trying to acquire rwsem will run rwsem_down_write_failed() due
+ * to the waiting threads and block trying to acquire the spinlock.
+ *
+ * We use a dummy atomic update in order to acquire the cache line
+ * exclusively since we expect to succeed and run the final rwsem
+ * count adjustment pretty soon.
+ */
+ if (wake_type == RWSEM_WAKE_ANY &&
+ rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
+ /* Someone grabbed the sem for write already */
goto out;
- /* grant an infinite number of read locks to the readers at the front
- * of the queue
- * - note we increment the 'active part' of the count by the number of
- * readers before waking any processes up
+ /* Grant an infinite number of read locks to the readers at the front
+ * of the queue. Note we increment the 'active part' of the count by
+ * the number of readers before waking any processes up.
*/
- readers_only:
woken = 0;
do {
woken++;
@@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
} while (waiter->flags & RWSEM_WAITING_FOR_READ);
- loop = woken;
- woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
- if (!downgrading)
- /* we'd already done one increment earlier */
- woken -= RWSEM_ACTIVE_BIAS;
+ adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
+ if (waiter->flags & RWSEM_WAITING_FOR_READ)
+ /* hit end of list above */
+ adjustment -= RWSEM_WAITING_BIAS;
- rwsem_atomic_add(woken, sem);
+ rwsem_atomic_add(adjustment, sem);
next = sem->wait_list.next;
- for (; loop > 0; loop--) {
+ for (loop = woken; loop > 0; loop--) {
waiter = list_entry(next, struct rwsem_waiter, list);
next = waiter->list.next;
tsk = waiter->task;
@@ -138,10 +160,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
/* undo the change to the active count, but check for a transition
* 1->0 */
- undo:
- if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK)
+ undo_write:
+ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
goto out;
- goto try_again;
+ goto try_again_write;
}
/*
@@ -149,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
*/
static struct rw_semaphore __sched *
rwsem_down_failed_common(struct rw_semaphore *sem,
- struct rwsem_waiter *waiter, signed long adjustment)
+ unsigned int flags, signed long adjustment)
{
+ struct rwsem_waiter waiter;
struct task_struct *tsk = current;
signed long count;
@@ -158,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
/* set up my own style of waitqueue */
spin_lock_irq(&sem->wait_lock);
- waiter->task = tsk;
+ waiter.task = tsk;
+ waiter.flags = flags;
get_task_struct(tsk);
- list_add_tail(&waiter->list, &sem->wait_list);
+ if (list_empty(&sem->wait_list))
+ adjustment += RWSEM_WAITING_BIAS;
+ list_add_tail(&waiter.list, &sem->wait_list);
- /* we're now waiting on the lock, but no longer actively read-locking */
+ /* we're now waiting on the lock, but no longer actively locking */
count = rwsem_atomic_update(adjustment, sem);
- /* if there are no active locks, wake the front queued process(es) up */
- if (!(count & RWSEM_ACTIVE_MASK))
- sem = __rwsem_do_wake(sem, 0);
+ /* If there are no active locks, wake the front queued process(es) up.
+ *
+ * Alternatively, if we're called from a failed down_write(), there
+ * were already threads queued before us and there are no active
+ * writers, the lock must be read owned; so we try to wake any read
+ * locks that were queued ahead of us. */
+ if (count == RWSEM_WAITING_BIAS)
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
+ else if (count > RWSEM_WAITING_BIAS &&
+ adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
spin_unlock_irq(&sem->wait_lock);
/* wait to be given the lock */
for (;;) {
- if (!waiter->task)
+ if (!waiter.task)
break;
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
@@ -191,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
asmregparm struct rw_semaphore __sched *
rwsem_down_read_failed(struct rw_semaphore *sem)
{
- struct rwsem_waiter waiter;
-
- waiter.flags = RWSEM_WAITING_FOR_READ;
- rwsem_down_failed_common(sem, &waiter,
- RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
- return sem;
+ return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
+ -RWSEM_ACTIVE_READ_BIAS);
}
/*
@@ -205,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
asmregparm struct rw_semaphore __sched *
rwsem_down_write_failed(struct rw_semaphore *sem)
{
- struct rwsem_waiter waiter;
-
- waiter.flags = RWSEM_WAITING_FOR_WRITE;
- rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
-
- return sem;
+ return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
+ -RWSEM_ACTIVE_WRITE_BIAS);
}
/*
@@ -225,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 0);
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -245,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 1);
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
spin_unlock_irqrestore(&sem->wait_lock, flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index a009055140ec..34e3082632d8 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -50,19 +50,11 @@
*/
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
- SYNC_FOR_CPU = 0,
- SYNC_FOR_DEVICE = 1,
-};
-
int swiotlb_force;
/*
- * Used to do a quick range check in unmap_single and
- * sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in swiotlb_tbl_unmap_single and
+ * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
static char *io_tlb_start, *io_tlb_end;
@@ -140,28 +132,14 @@ void swiotlb_print_info(void)
(unsigned long long)pend);
}
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void __init
-swiotlb_init_with_default_size(size_t default_size, int verbose)
+void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
unsigned long i, bytes;
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
-
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ bytes = nslabs << IO_TLB_SHIFT;
- /*
- * Get IO TLB memory from the low pages
- */
- io_tlb_start = alloc_bootmem_low_pages(bytes);
- if (!io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer");
+ io_tlb_nslabs = nslabs;
+ io_tlb_start = tlb;
io_tlb_end = io_tlb_start + bytes;
/*
@@ -185,6 +163,32 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
swiotlb_print_info();
}
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init
+swiotlb_init_with_default_size(size_t default_size, int verbose)
+{
+ unsigned long bytes;
+
+ if (!io_tlb_nslabs) {
+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ io_tlb_start = alloc_bootmem_low_pages(bytes);
+ if (!io_tlb_start)
+ panic("Cannot allocate SWIOTLB buffer");
+
+ swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
+}
+
void __init
swiotlb_init(int verbose)
{
@@ -323,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
/*
* Bounce: copy the swiotlb buffer back to the original dma location
*/
-static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
- enum dma_data_direction dir)
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(phys);
@@ -360,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
memcpy(phys_to_virt(phys), dma_addr, size);
}
}
+EXPORT_SYMBOL_GPL(swiotlb_bounce);
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
+void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
+ phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
char *dma_addr;
unsigned int nslots, stride, index, wrap;
int i;
- unsigned long start_dma_addr;
unsigned long mask;
unsigned long offset_slots;
unsigned long max_slots;
mask = dma_get_seg_boundary(hwdev);
- start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
- offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ tbl_dma_addr &= mask;
+
+ offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
/*
* Carefully handle integer overflow which can occur when mask == ~0UL.
@@ -466,12 +469,27 @@ found:
return dma_addr;
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+
+static void *
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
+{
+ dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+
+ return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
+}
/*
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
*/
-static void
-do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+void
+swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -509,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
}
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
-static void
-sync_single(struct device *hwdev, char *dma_addr, size_t size,
- int dir, int target)
+void
+swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir,
+ enum dma_sync_target target)
{
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t phys = io_tlb_orig_addr[index];
@@ -536,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
BUG();
}
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
void *
swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -559,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
}
if (!ret) {
/*
- * We are either out of memory or the device can't DMA
- * to GFP_DMA memory; fall back on map_single(), which
+ * We are either out of memory or the device can't DMA to
+ * GFP_DMA memory; fall back on map_single(), which
* will grab memory from the lowest available address range.
*/
ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
@@ -578,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
(unsigned long long)dev_addr);
/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
- do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+ swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
return NULL;
}
*dma_handle = dev_addr;
@@ -596,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
if (!is_swiotlb_buffer(paddr))
free_pages((unsigned long)vaddr, get_order(size));
else
- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
- do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+ /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
+ swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
}
EXPORT_SYMBOL(swiotlb_free_coherent);
static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+ int do_panic)
{
/*
* Ran out of IOMMU space for this operation. This is very bad.
@@ -680,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
* whatever the device wrote there.
*/
static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(paddr)) {
- do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+ swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
return;
}
@@ -723,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
*/
static void
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir, int target)
+ size_t size, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(paddr)) {
- sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
+ swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
+ target);
return;
}
@@ -809,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
int
swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
@@ -836,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
void
swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
@@ -851,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
*/
static void
swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
- int nelems, int dir, int target)
+ int nelems, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
struct scatterlist *sg;
int i;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index b8a2f549ab0e..7af9d841c43b 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -146,19 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
{
char *tail;
unsigned long val;
- size_t len;
*res = 0;
- len = strlen(cp);
- if (len == 0)
+ if (!*cp)
return -EINVAL;
val = simple_strtoul(cp, &tail, base);
if (tail == cp)
return -EINVAL;
- if ((*tail == '\0') ||
- ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
+ if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
*res = val;
return 0;
}
@@ -220,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
{
char *tail;
unsigned long long val;
- size_t len;
*res = 0;
- len = strlen(cp);
- if (len == 0)
+ if (!*cp)
return -EINVAL;
val = simple_strtoull(cp, &tail, base);
if (tail == cp)
return -EINVAL;
- if ((*tail == '\0') ||
- ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
+ if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
*res = val;
return 0;
}
@@ -980,6 +974,11 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
* [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
* little endian output byte order is:
* [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
+ * - 'V' For a struct va_format which contains a format string * and va_list *,
+ * call vsnprintf(->format, *->va_list).
+ * Implements a "recursive vsnprintf".
+ * Do not use this feature without some mechanism to verify the
+ * correctness of the format string and va_list arguments.
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1025,6 +1024,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
break;
case 'U':
return uuid_string(buf, end, ptr, spec, fmt);
+ case 'V':
+ return buf + vsnprintf(buf, end - buf,
+ ((struct va_format *)ptr)->fmt,
+ *(((struct va_format *)ptr)->va));
}
spec.flags |= SMALL;
if (spec.field_width == -1) {