From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 30 Mar 2009 19:07:44 +0900 Subject: percpu: use dynamic percpu allocator as the default percpu allocator This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Acked-by: Martin Schwidefsky Reviewed-by: Christoph Lameter Cc: Paul Mundt Cc: Russell King Cc: Mikael Starvik Cc: Ralf Baechle Cc: Bryan Wu Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Hirokazu Takata Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Heiko Carstens Cc: Ingo Molnar --- arch/ia64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..328d2f8b8c3f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y -- cgit v1.2.3 From 405d967dc70002991f8fc35c20e0d3cbc7614f63 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:38 +0900 Subject: linker script: throw away .discard section x86 throws away .discard section but no other archs do. Also, .discard is not thrown away while linking modules. Make every arch and module linking throw it away. This will be used to define dummy variables for percpu declarations and definitions. This patch is based on Ivan Kokshaysky's alpha percpu patch. [ Impact: always throw away everything in .discard ] Signed-off-by: Tejun Heo Cc: Ivan Kokshaysky Cc: Richard Henderson Cc: Russell King Cc: Haavard Skinnemoen Cc: Bryan Wu Cc: Mikael Starvik Cc: Jesper Nilsson Cc: David Howells Cc: Yoshinori Sato Cc: Tony Luck Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Ralf Baechle Cc: Kyle McMartin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: David S. Miller Cc: Jeff Dike Cc: Chris Zankel Cc: Rusty Russell Cc: Ingo Molnar --- arch/ia64/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 4a95e86b9ac2..13d958975874 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -29,6 +29,7 @@ SECTIONS EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) *(.IA_64.unwind.exit.text) *(.IA_64.unwind_info.exit.text) } -- cgit v1.2.3 From 204fba4aa303ea4a7bb726a539bf4a5b9e3203d0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:45 +0900 Subject: percpu: cleanup percpu array definitions Currently, the following three different ways to define percpu arrays are in use. 1. DEFINE_PER_CPU(elem_type[array_len], array_name); 2. DEFINE_PER_CPU(elem_type, array_name[array_len]); 3. DEFINE_PER_CPU(elem_type, array_name)[array_len]; Unify to #1 which correctly separates the roles of the two parameters and thus allows more flexibility in the way percpu variables are defined. [ Impact: cleanup ] Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Cc: Ingo Molnar Cc: Tony Luck Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Jeremy Fitzhardinge Cc: linux-mm@kvack.org Cc: Christoph Lameter Cc: David S. Miller --- arch/ia64/kernel/smp.c | 2 +- arch/ia64/sn/kernel/setup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index f0c521b0ba4c..94cf78ba28fa 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -58,7 +58,7 @@ static struct local_tlb_flush_counts { unsigned int count; } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; -static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; +static DEFINE_PER_CPU(unsigned short [NR_CPUS], shadow_flush_counts) ____cacheline_aligned; #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index e456f062f241..ece1bf994499 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); -DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +DEFINE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); -- cgit v1.2.3 From b9bf3121af348d9255f1c917830fe8c2df52efcb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:47 +0900 Subject: percpu: use DEFINE_PER_CPU_SHARED_ALIGNED() There are a few places where ___cacheline_aligned* is used with DEFINE_PER_CPU(). Use DEFINE_PER_CPU_SHARED_ALIGNED() instead. DEFINE_PER_CPU_SHARED_ALIGNED() applies alignment only on SMPs. While all other converted places used _in_smp variant or only get compiled for SMP, net/rds used unconditional ____cacheline_aligned. I don't see any reason these data structures should be aligned on UP and thus converted together. Signed-off-by: Tejun Heo Cc: Mike Frysinger Cc: Tony Luck Cc: Andy Grover --- arch/ia64/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 94cf78ba28fa..93ebfea43c6c 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -58,7 +58,8 @@ static struct local_tlb_flush_counts { unsigned int count; } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; -static DEFINE_PER_CPU(unsigned short [NR_CPUS], shadow_flush_counts) ____cacheline_aligned; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], + shadow_flush_counts); #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 -- cgit v1.2.3 From 6ed106549d17474ca17a16057f4c0ed4eba5a7ca Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 23 Jun 2009 06:03:08 +0000 Subject: net: use NETDEV_TX_OK instead of 0 in ndo_start_xmit() functions This patch is the result of an automatic spatch transformation to convert all ndo_start_xmit() return values of 0 to NETDEV_TX_OK. Some occurences are missed by the automatic conversion, those will be handled in a seperate patch. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- arch/ia64/hp/sim/simeth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index e4d8fde68103..7e81966ce481 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -412,7 +412,7 @@ simeth_tx(struct sk_buff *skb, struct net_device *dev) */ dev_kfree_skb(skb); - return 0; + return NETDEV_TX_OK; } static inline struct sk_buff * -- cgit v1.2.3 From 023bf6f1b8bf58dc4da7f0dc1cf4787b0d5297c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2009 11:27:40 +0900 Subject: linker script: unify usage of discard definition Discarded sections in different archs share some commonality but have considerable differences. This led to linker script for each arch implementing its own /DISCARD/ definition, which makes maintaining tedious and adding new entries error-prone. This patch makes all linker scripts to move discard definitions to the end of the linker script and use the common DISCARDS macro. As ld uses the first matching section definition, archs can include default discarded sections by including them earlier in the linker script. ia64 is notable because it first throws away some ia64 specific subsections and then include the rest of the sections into the final image, so those sections must be discarded before the inclusion. defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64, alpha, sparc, sparc64 and s390. Michal Simek tested microblaze. Signed-off-by: Tejun Heo Acked-by: Paul Mundt Acked-by: Mike Frysinger Tested-by: Michal Simek Cc: linux-arch@vger.kernel.org Cc: Michal Simek Cc: microblaze-uclinux@itee.uq.edu.au Cc: Sam Ravnborg Cc: Tony Luck --- arch/ia64/kernel/vmlinux.lds.S | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 13d958975874..eb4214d1c5af 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -24,15 +24,14 @@ PHDRS { } SECTIONS { - /* Sections to be discarded */ + /* unwind exit sections must be discarded before the rest of the + sections get included. */ /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) *(.IA_64.unwind.exit.text) *(.IA_64.unwind_info.exit.text) - } + *(.comment) + *(.note) + } v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ phys_start = _start - LOAD_OFFSET; @@ -317,7 +316,7 @@ SECTIONS .debug_funcnames 0 : { *(.debug_funcnames) } .debug_typenames 0 : { *(.debug_typenames) } .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ - /DISCARD/ : { *(.comment) } - /DISCARD/ : { *(.note) } + + /* Default discards */ + DISCARDS } -- cgit v1.2.3 From 872fb6dd6b07986417964e089074e7acfd025f4c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 13 Jul 2009 13:09:43 -0700 Subject: ia64: Fix setup_per_cpu_areas() compilation error Fix ia64 build setup_per_cpu_areas() redifinition issue in UP configuration. When compiling ia64 kernel in UP configuration, the following compilation errors are reported: arch/ia64/kernel/setup.c:860: error: redefinition of 'setup_per_cpu_areas' include/linux/percpu.h:185: error: previous definition of 'setup_per_cpu_areas' was here The patch fixes the issue in arch/ia64/kernel/setup.c Signed-off-by: Fenghua Yu Signed-off-by: Tejun Heo --- arch/ia64/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 1b23ec126b63..1de86c96801d 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -855,11 +855,17 @@ identify_cpu (struct cpuinfo_ia64 *c) c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); } +/* + * In UP configuration, setup_per_cpu_areas() is defined in + * include/linux/percpu.h + */ +#ifdef CONFIG_SMP void __init setup_per_cpu_areas (void) { /* start_kernel() requires this... */ } +#endif /* * Do the following calculations: -- cgit v1.2.3 From d7aacaddcac3971e33cf52d7e610c06696cb347f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 8 Jul 2009 13:21:31 +0200 Subject: Driver Core: Add platform device arch data V3 Allow architecture specific data in struct platform_device V3. With this patch struct pdev_archdata is added to struct platform_device, similar to struct dev_archdata in found in struct device. Useful for architecture code that needs to keep extra data associated with each platform device. Struct pdev_archdata is different from dev.platform_data, the convention is that dev.platform_data points to driver-specific data. It may or may not be required by the driver. The format of this depends on driver but is the same across architectures. The structure pdev_archdata is a place for architecture specific data. This data is handled by architecture specific code (for example runtime PM), and since it is architecture specific it should _never_ be touched by device driver code. Exactly like struct dev_archdata but for platform devices. [rjw: This change is for power management mostly and that's why it goes through the suspend tree.] Signed-off-by: Magnus Damm Acked-by: Kevin Hilman Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- arch/ia64/include/asm/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h index 41ab85d66f33..d66d446b127c 100644 --- a/arch/ia64/include/asm/device.h +++ b/arch/ia64/include/asm/device.h @@ -15,4 +15,7 @@ struct dev_archdata { #endif }; +struct pdev_archdata { +}; + #endif /* _ASM_IA64_DEVICE_H */ -- cgit v1.2.3 From a0b00ca84b3ecb9eebd62ad34880d8cc0d988c8a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:56 +0900 Subject: ia64: add dma_capable() to replace is_buffer_dma_capable() dma_capable() eventually replaces is_buffer_dma_capable(), which tells if a memory area is dma-capable or not. The problem of is_buffer_dma_capable() is that it doesn't take a pointer to struct device so it doesn't work for POWERPC. Signed-off-by: FUJITA Tomonori --- arch/ia64/include/asm/dma-mapping.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 5a61b5c2e18f..88d0f860394d 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -69,6 +69,14 @@ dma_set_mask (struct device *dev, u64 mask) return 0; } +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size <= *dev->dma_mask; +} + extern int dma_get_cache_alignment(void); static inline void -- cgit v1.2.3 From 8d4f5339d1ee4027c07e6b2a1cfa9dc41b0d383b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:05:01 +0900 Subject: x86, IA64, powerpc: add phys_to_dma() and dma_to_phys() This adds two functions, phys_to_dma() and dma_to_phys() to x86, IA64 and powerpc. swiotlb uses them. phys_to_dma() converts a physical address to a dma address. dma_to_phys() does the opposite. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/ia64/include/asm/dma-mapping.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 88d0f860394d..f91829de329f 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -77,6 +77,16 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size <= *dev->dma_mask; } +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + extern int dma_get_cache_alignment(void); static inline void -- cgit v1.2.3 From 6a12235c7d2d75c7d94b9afcaaecd422ff845ce0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 29 Jul 2009 10:25:58 +0100 Subject: agp: kill phys_to_gart() and gart_to_phys() There seems to be no reason for these -- they're a 1:1 mapping on all platforms. Signed-off-by: David Woodhouse --- arch/ia64/include/asm/agp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/agp.h b/arch/ia64/include/asm/agp.h index c11fdd8ab4d7..01d09c401c5c 100644 --- a/arch/ia64/include/asm/agp.h +++ b/arch/ia64/include/asm/agp.h @@ -17,10 +17,6 @@ #define unmap_page_from_agp(page) /* nothing */ #define flush_agp_cache() mb() -/* Convert a physical address to an address suitable for the GART. */ -#define phys_to_gart(x) (x) -#define gart_to_phys(x) (x) - /* GATT allocation. Returns/accepts GATT kernel virtual address. */ #define alloc_gatt_pages(order) \ ((char *)__get_free_pages(GFP_KERNEL, (order))) -- cgit v1.2.3 From 49c794e94649020248e37b78db16cd25bad38b4f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 4 Aug 2009 07:28:28 +0000 Subject: net: implement a SO_PROTOCOL getsockoption Similar to SO_TYPE returning the socket type, SO_PROTOCOL allows to retrieve the protocol used with a given socket. I am not quite sure why we have that-many copies of socket.h, and why the values are not the same on all arches either, but for where hex numbers dominate, I use 0x1029 for SO_PROTOCOL as that seems to be the next free unused number across a bunch of operating systems, or so Google results make me want to believe. SO_PROTOCOL for others just uses the next free Linux number, 38. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- arch/ia64/include/asm/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index 745421225ec6..091cd9d47d0f 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -66,4 +66,6 @@ #define SO_TIMESTAMPING 37 #define SCM_TIMESTAMPING SO_TIMESTAMPING +#define SO_PROTOCOL 38 + #endif /* _ASM_IA64_SOCKET_H */ -- cgit v1.2.3 From 0d6038ee76f2e06b79d0465807f67e86bf4025de Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 4 Aug 2009 07:28:29 +0000 Subject: net: implement a SO_DOMAIN getsockoption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This sockopt goes in line with SO_TYPE and SO_PROTOCOL. It makes it possible for userspace programs to pass around file descriptors — I am referring to arguments-to-functions, but it may even work for the fd passing over UNIX sockets — without needing to also pass the auxiliary information (PF_INET6/IPPROTO_TCP). Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- arch/ia64/include/asm/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index 091cd9d47d0f..0b0d5ff062e5 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -67,5 +67,6 @@ #define SCM_TIMESTAMPING SO_TIMESTAMPING #define SO_PROTOCOL 38 +#define SO_DOMAIN 39 #endif /* _ASM_IA64_SOCKET_H */ -- cgit v1.2.3 From be02ff9940c0106dea1470462401a07c5d52e086 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 10 Aug 2009 11:53:11 +0900 Subject: IA64: Remove NULL flush_write_buffers flush_write_buffers() in dma-mapping-common.h was removed so we can remove NULL flush_write_buffers() in IA64. Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: davem@davemloft.net LKML-Reference: <1249872797-1314-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/dma-mapping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index f91829de329f..8d3c79cd81e7 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -44,7 +44,6 @@ static inline void dma_free_coherent(struct device *dev, size_t size, #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #define get_dma_ops(dev) platform_dma_get_ops(dev) -#define flush_write_buffers() #include -- cgit v1.2.3 From a157229cabd6dd8cfa82525fc9bf730c94cc9ac2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 22 Aug 2009 13:56:51 -0700 Subject: rcu: Simplify rcu_pending()/rcu_check_callbacks() API All calls from outside RCU are of the form: if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user); This is silly, instead we put a call to rcu_pending() in rcu_check_callbacks(), and then make the outside calls be to rcu_check_callbacks(). This cuts down on the code a bit and also gives the compiler a better chance of optimizing. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <125097461311-git-send-email-> Signed-off-by: Ingo Molnar --- arch/ia64/xen/time.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index fb8332690179..dbeadb9c8e20 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -133,8 +133,7 @@ consider_steal_time(unsigned long new_itm) account_idle_ticks(blocked); run_local_timers(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, user_mode(get_irq_regs())); + rcu_check_callbacks(cpu, user_mode(get_irq_regs())); scheduler_tick(); run_posix_cpu_timers(p); -- cgit v1.2.3 From 46cf98cdaef5471926010b5bddf84c44ec177fdd Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:37 -0700 Subject: x86, pat: Generalize the use of page flag PG_uncached Only IA64 was using PG_uncached as of now. We now intend to use this bit in x86 as well, to keep track of memory type of those addresses that have page struct for them. So, generalize the use of that bit across ia64 and x86. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/ia64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..e6246119932a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR +config ARCH_USES_PG_UNCACHED + def_bool y + depends on IA64_UNCACHED_ALLOCATOR + config AUDIT_ARCH bool default y -- cgit v1.2.3 From ee18d64c1f632043a02e6f5ba5e045bb26a5465f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Sep 2009 09:14:21 +0100 Subject: KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include #include #include #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells Signed-off-by: James Morris --- arch/ia64/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 5d7c0e5b9e76..89969e950045 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -192,6 +192,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) if (test_thread_flag(TIF_NOTIFY_RESUME)) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(&scr->pt); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } /* copy user rbs to kernel rbs */ -- cgit v1.2.3 From f2486f26692433ba27cc10991a085b503b0422a3 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Mon, 31 Aug 2009 16:54:03 -0700 Subject: [IA64] Fix warning in dma-mapping.c arch/ia64/kernel/dma-mapping.c:14: warning: control reaches end of non-void function arch/ia64/kernel/dma-mapping.c:14: warning: no return statement in function returning non-void This warning was introduced by commit: 390bd132b2831a2ad0268e84bffbfc0680debfe5 Add dma_debug_init() for ia64 Signed-off-by: Tony Luck --- arch/ia64/kernel/dma-mapping.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 39a3cd0a4173..f2c1600da097 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -10,7 +10,9 @@ EXPORT_SYMBOL(dma_ops); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; } fs_initcall(dma_init); -- cgit v1.2.3 From 5afe18d2f58812f3924edbd215464e5e3e8545e7 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 2 Sep 2009 11:00:46 +0200 Subject: [IA64] fix csum_ipv6_magic() The 32-bit parameters (len and csum) of csum_ipv6_magic() are passed in 64-bit registers in2 and in4. The high order 32 bits of the registers were never cleared, and garbage was sometimes calculated into the checksum. Fix this by clearing the high order 32 bits of these registers. Signed-off-by: Jiri Bohac Signed-off-by: Tony Luck --- arch/ia64/lib/ip_fast_csum.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index 1f86aeb2c948..620d9dc5220f 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S @@ -96,20 +96,22 @@ END(ip_fast_csum) GLOBAL_ENTRY(csum_ipv6_magic) ld4 r20=[in0],4 ld4 r21=[in1],4 - dep r15=in3,in2,32,16 + zxt4 in2=in2 ;; ld4 r22=[in0],4 ld4 r23=[in1],4 - mux1 r15=r15,@rev + dep r15=in3,in2,32,16 ;; ld4 r24=[in0],4 ld4 r25=[in1],4 - shr.u r15=r15,16 + mux1 r15=r15,@rev add r16=r20,r21 add r17=r22,r23 + zxt4 in4=in4 ;; ld4 r26=[in0],4 ld4 r27=[in1],4 + shr.u r15=r15,16 add r18=r24,r25 add r8=r16,r17 ;; -- cgit v1.2.3 From a7db50405216610c8a0d62b8b400180b6f366733 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 22 Jun 2009 08:08:07 -0600 Subject: PCI: remove pcibios_scan_all_fns() This was #define'd as 0 on all platforms, so let's get rid of it. This change makes pci_scan_slot() slightly easier to read. Cc: Yoshinori Sato Cc: Tony Luck Cc: David Howells Cc: "David S. Miller" Cc: Jeff Dike Cc: Ingo Molnar Cc: Ivan Kokshaysky Reviewed-by: Matthew Wilcox Acked-by: Russell King Acked-by: Ralf Baechle Acked-by: Kyle McMartin Acked-by: Benjamin Herrenschmidt Acked-by: Paul Mundt Acked-by: Arnd Bergmann Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- arch/ia64/include/asm/pci.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index fcfca56bb850..55281aabe5f2 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -17,7 +17,6 @@ * loader. */ #define pcibios_assign_all_busses() 0 -#define pcibios_scan_all_fns(a, b) 0 #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 @@ -135,7 +134,18 @@ extern void pcibios_resource_to_bus(struct pci_dev *dev, extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, struct pci_bus_region *region); -#define pcibios_scan_all_fns(a, b) 0 +static inline struct resource * +pcibios_select_root(struct pci_dev *pdev, struct resource *res) +{ + struct resource *root = NULL; + + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + + return root; +} #define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) -- cgit v1.2.3 From 0ba12d10817a8db1fd7d96d3283ec6c0b294aeab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 May 2009 16:45:19 +0300 Subject: KVM: Move common KVM Kconfig items to new file virt/kvm/Kconfig Reduce Kconfig code duplication. Signed-off-by: Avi Kivity --- arch/ia64/kvm/Kconfig | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 64d520937874..f922bbba3797 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -1,12 +1,8 @@ # # KVM configuration # -config HAVE_KVM - bool -config HAVE_KVM_IRQCHIP - bool - default y +source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -28,6 +24,7 @@ config KVM depends on PCI select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_IRQCHIP ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent -- cgit v1.2.3 From 3032b925f00ba2653f7695d356d6f8284c82038d Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 25 May 2009 10:22:17 +0200 Subject: KVM: ia64: Correct itc_offset calculations Init the itc_offset for all possible vCPUs. The current code by mistake ends up only initializing the offset on vCPU 0. Spotted by Gleb Natapov. Signed-off-by: Jes Sorensen Acked-by : Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 80c57b0a21c4..319922137fdd 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1224,7 +1224,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /*Initialize itc offset for vcpus*/ itc_offset = 0UL - kvm_get_itc(vcpu); - for (i = 0; i < kvm->arch.online_vcpus; i++) { + for (i = 0; i < KVM_MAX_VCPUS; i++) { v = (struct kvm_vcpu *)((char *)vcpu + sizeof(struct kvm_vcpu_data) * i); v->arch.itc_offset = itc_offset; -- cgit v1.2.3 From fa40a8214bb9bcae8d49c234c19d8b4a6c1f37ff Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 4 Jun 2009 15:08:24 -0300 Subject: KVM: switch irq injection/acking data structures to irq_lock Protect irq injection/acking data structures with a separate irq_lock mutex. This fixes the following deadlock: CPU A CPU B kvm_vm_ioctl_deassign_dev_irq() mutex_lock(&kvm->lock); worker_thread() -> kvm_deassign_irq() -> kvm_assigned_dev_interrupt_work_handler() -> deassign_host_irq() mutex_lock(&kvm->lock); -> cancel_work_sync() [blocked] [gleb: fix ia64 path] Reported-by: Alex Williamson Signed-off-by: Marcelo Tosatti Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 319922137fdd..8dde36953af3 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1000,10 +1000,10 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { __s32 status; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->irq_lock); status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->irq_lock); if (ioctl == KVM_IRQ_LINE_STATUS) { irq_event.status = status; if (copy_to_user(argp, &irq_event, -- cgit v1.2.3 From c5af89b68abb26eea5e745f33228f4d672f115e5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 9 Jun 2009 15:56:26 +0300 Subject: KVM: Introduce kvm_vcpu_is_bsp() function. Use it instead of open code "vcpu_id zero is BSP" assumption. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/ia64/kvm/vcpu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 8dde36953af3..4082665ace0a 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1216,7 +1216,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) if (IS_ERR(vmm_vcpu)) return PTR_ERR(vmm_vcpu); - if (vcpu->vcpu_id == 0) { + if (kvm_vcpu_is_bsp(vcpu)) { vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; /*Set entry address for first run.*/ diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index cc406d064a09..61a3320b62c1 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -830,7 +830,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) kvm = (struct kvm *)KVM_VM_BASE; - if (vcpu->vcpu_id == 0) { + if (kvm_vcpu_is_bsp(vcpu)) { for (i = 0; i < kvm->arch.online_vcpus; i++) { v = (struct kvm_vcpu *)((char *)vcpu + sizeof(struct kvm_vcpu_data) * i); -- cgit v1.2.3 From 73880c80aa9c8dc353cd0ad26579023213cd5314 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 9 Jun 2009 15:56:28 +0300 Subject: KVM: Break dependency between vcpu index in vcpus array and vcpu_id. Archs are free to use vcpu_id as they see fit. For x86 it is used as vcpu's apic id. New ioctl is added to configure boot vcpu id that was assumed to be 0 till now. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 1 - arch/ia64/kvm/Kconfig | 1 + arch/ia64/kvm/kvm-ia64.c | 8 ++------ arch/ia64/kvm/vcpu.c | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 5f43697aed30..9cf1c4b1f92f 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -465,7 +465,6 @@ struct kvm_arch { unsigned long metaphysical_rr4; unsigned long vmm_init_rr; - int online_vcpus; int is_sn2; struct kvm_ioapic *vioapic; diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index f922bbba3797..cbadd8a65233 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -25,6 +25,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select KVM_APIC_ARCHITECTURE ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 4082665ace0a..d1f7bcda2c7f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -338,7 +338,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, union ia64_lid lid; int i; - for (i = 0; i < kvm->arch.online_vcpus; i++) { + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) { if (kvm->vcpus[i]) { lid.val = VCPU_LID(kvm->vcpus[i]); if (lid.id == id && lid.eid == eid) @@ -412,7 +412,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) call_data.ptc_g_data = p->u.ptc_g_data; - for (i = 0; i < kvm->arch.online_vcpus; i++) { + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) { if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == KVM_MP_STATE_UNINITIALIZED || vcpu == kvm->vcpus[i]) @@ -852,8 +852,6 @@ struct kvm *kvm_arch_create_vm(void) kvm_init_vm(kvm); - kvm->arch.online_vcpus = 0; - return kvm; } @@ -1356,8 +1354,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, goto fail; } - kvm->arch.online_vcpus++; - return vcpu; fail: return ERR_PTR(r); diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index 61a3320b62c1..dce75b70cdd5 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -831,7 +831,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) kvm = (struct kvm *)KVM_VM_BASE; if (kvm_vcpu_is_bsp(vcpu)) { - for (i = 0; i < kvm->arch.online_vcpus; i++) { + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) { v = (struct kvm_vcpu *)((char *)vcpu + sizeof(struct kvm_vcpu_data) * i); VMX(v, itc_offset) = itc_offset; -- cgit v1.2.3 From 988a2cae6a3c0dea6df59808a935a9a697bfc28c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 9 Jun 2009 15:56:29 +0300 Subject: KVM: Use macro to iterate over vcpus. [christian: remove unused variables on s390] Signed-off-by: Gleb Natapov Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d1f7bcda2c7f..5c766bd82b05 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -337,13 +337,12 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, { union ia64_lid lid; int i; + struct kvm_vcpu *vcpu; - for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) { - if (kvm->vcpus[i]) { - lid.val = VCPU_LID(kvm->vcpus[i]); - if (lid.id == id && lid.eid == eid) - return kvm->vcpus[i]; - } + kvm_for_each_vcpu(i, vcpu, kvm) { + lid.val = VCPU_LID(vcpu); + if (lid.id == id && lid.eid == eid) + return vcpu; } return NULL; @@ -409,21 +408,21 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct kvm *kvm = vcpu->kvm; struct call_data call_data; int i; + struct kvm_vcpu *vcpui; call_data.ptc_g_data = p->u.ptc_g_data; - for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) { - if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == - KVM_MP_STATE_UNINITIALIZED || - vcpu == kvm->vcpus[i]) + kvm_for_each_vcpu(i, vcpui, kvm) { + if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED || + vcpu == vcpui) continue; - if (waitqueue_active(&kvm->vcpus[i]->wq)) - wake_up_interruptible(&kvm->vcpus[i]->wq); + if (waitqueue_active(&vcpui->wq)) + wake_up_interruptible(&vcpui->wq); - if (kvm->vcpus[i]->cpu != -1) { - call_data.vcpu = kvm->vcpus[i]; - smp_call_function_single(kvm->vcpus[i]->cpu, + if (vcpui->cpu != -1) { + call_data.vcpu = vcpui; + smp_call_function_single(vcpui->cpu, vcpu_global_purge, &call_data, 1); } else printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); -- cgit v1.2.3 From ec04b2604c3707a46db1d26d98f82b11d0844669 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 19 Jun 2009 15:16:23 +0200 Subject: KVM: Prepare memslot data structures for multiple hugepage sizes [avi: fix build on non-x86] Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 9cf1c4b1f92f..d9b6325a9328 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -235,7 +235,8 @@ struct kvm_vm_data { #define KVM_REQ_PTC_G 32 #define KVM_REQ_RESUME 33 -#define KVM_PAGES_PER_HPAGE 1 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) 1 struct kvm; struct kvm_vcpu; -- cgit v1.2.3 From 2023a29cbe34139afcea8f65f8aef78c325c5dc0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 18 Jun 2009 11:47:28 -0300 Subject: KVM: remove old KVMTRACE support code Return EOPNOTSUPP for KVM_TRACE_ENABLE/PAUSE/DISABLE ioctls. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index cbadd8a65233..ef3e7be29caf 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -47,9 +47,6 @@ config KVM_INTEL Provides support for KVM on Itanium 2 processors equipped with the VT extensions. -config KVM_TRACE - bool - source drivers/virtio/Kconfig endif # VIRTUALIZATION -- cgit v1.2.3 From bda9020e2463ec94db9f97e8615f3bae22069838 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Jun 2009 22:24:32 +0300 Subject: KVM: remove in_range from io devices This changes bus accesses to use high-level kvm_io_bus_read/kvm_io_bus_write functions. in_range now becomes unused so it is removed from device ops in favor of read/write callbacks performing range checks internally. This allows aliasing (mostly for in-kernel virtio), as well as better error handling by making it possible to pass errors up to userspace. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5c766bd82b05..d7aa6bb8f477 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext) } -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr, int len, int is_write) -{ - struct kvm_io_device *dev; - - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write); - - return dev; -} - static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { kvm_run->exit_reason = KVM_EXIT_UNKNOWN; @@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct kvm_mmio_req *p; struct kvm_io_device *mmio_dev; + int r; p = kvm_get_vcpu_ioreq(vcpu); @@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; mmio: - mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir); - if (mmio_dev) { - if (!p->dir) - kvm_iodevice_write(mmio_dev, p->addr, p->size, - &p->data); - else - kvm_iodevice_read(mmio_dev, p->addr, p->size, - &p->data); - - } else + if (p->dir) + r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr, + p->size, &p->data); + else + r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr, + p->size, &p->data); + if (r) printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); p->state = STATE_IORESP_READY; -- cgit v1.2.3 From a1b37100d9e29c1f8dc3e2f5490a205c80180e01 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 9 Jul 2009 15:33:52 +0300 Subject: KVM: Reduce runnability interface with arch support code Remove kvm_cpu_has_interrupt() and kvm_arch_interrupt_allowed() from interface between general code and arch code. kvm_arch_vcpu_runnable() checks for interrupts instead. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d7aa6bb8f477..0ad09f05efa9 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1935,19 +1935,6 @@ int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) return find_highest_bits((int *)&vpd->irr[0]); } -int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) -{ - if (kvm_highest_pending_irq(vcpu) != -1) - return 1; - return 0; -} - -int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) -{ - /* do real check here */ - return 1; -} - int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { return vcpu->arch.timer_fired; @@ -1960,7 +1947,8 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE; + return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || + (kvm_highest_pending_irq(vcpu) != -1); } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 27c238106862fb0dd3e229c48c1ef56502b0ec88 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Aug 2009 15:31:11 +0300 Subject: KVM: Add __KERNEL__ guards to exported headers Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_para.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h index 0d6d8ca07b8c..1588aee781a2 100644 --- a/arch/ia64/include/asm/kvm_para.h +++ b/arch/ia64/include/asm/kvm_para.h @@ -19,9 +19,13 @@ * */ +#ifdef __KERNEL__ + static inline unsigned int kvm_arch_para_features(void) { return 0; } #endif + +#endif -- cgit v1.2.3 From 4295ab34883d2070b1145e14f4619478e9788807 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto <[seto.hidetoshi@jp.fujitsu.com]> Date: Thu, 6 Aug 2009 14:51:56 -0700 Subject: [IA64] kdump: Mask MCA/INIT on frozen cpus Summary: INIT asserted on kdump kernel invokes INIT handler not only on a cpu that running on the kdump kernel, but also BSP of the panicked kernel, because the (badly) frozen BSP can be thawed by INIT. Description: The kdump_cpu_freeze() is called on cpus except one that initiates panic and/or kdump, to stop/offline the cpu (on ia64, it means we pass control of cpus to SAL, or put them in spinloop). Note that CPU0(BSP) always go to spinloop, so if panic was happened on an AP, there are at least 2cpus (= the AP and BSP) which not back to SAL. On the spinning cpus, interrupts are disabled (rsm psr.i), but INIT is still interruptible because psr.mc for mask them is not set unless kdump_cpu_freeze() is not called from MCA/INIT context. Therefore, assume that a panic was happened on an AP, kdump was invoked, new INIT handlers for kdump kernel was registered and then an INIT is asserted. From the viewpoint of SAL, there are 2 online cpus, so INIT will be delivered to both of them. It likely means that not only the AP (= a cpu executing kdump) enters INIT handler which is newly registered, but also BSP (= another cpu spinning in panicked kernel) enters the same INIT handler. Of course setting of registers in BSP are still old (for panicked kernel), so what happen with running handler with wrong setting will be extremely unexpected. I believe this is not desirable behavior. How to Reproduce: Start kdump on one of APs (e.g. cpu1) # taskset 0x2 echo c > /proc/sysrq-trigger Then assert INIT after kdump kernel is booted, after new INIT handler for kdump kernel is registered. Expected results: An INIT handler is invoked only on the AP. Actual results: An INIT handler is invoked on the AP and BSP. Sample of results: I got following console log by asserting INIT after prompt "root:/>". It seems that two monarchs appeared by one INIT, and one panicked at last. And it also seems that the panicked one supposed there were 4 online cpus and no one did rendezvous: : [ 0 %]dropping to initramfs shell exiting this shell will reboot your system root:/> Entered OS INIT handler. PSP=fff301a0 cpu=0 monarch=0 ia64_init_handler: Promoting cpu 0 to monarch. Delaying for 5 seconds... All OS INIT slaves have reached rendezvous Processes interrupted by INIT - 0 (cpu 0 task 0xa000000100af0000) : <> : Entered OS INIT handler. PSP=fff301a0 cpu=0 monarch=1 Delaying for 5 seconds... mlogbuf_finish: printing switched to urgent mode, MCA/INIT might be dodgy or fail. OS INIT slave did not rendezvous on cpu 1 2 3 INIT swapper 0[0]: bugcheck! 0 [1] : <> : Kernel panic - not syncing: Attempted to kill the idle task! Proposed fix: To avoid this problem, this patch inserts ia64_set_psr_mc() to mask INIT on cpus going to be frozen. This masking have no effect if the kdump_cpu_freeze() is called from INIT handler when kdump_on_init == 1, because psr.mc is already turned on to 1 before entering OS_INIT. I confirmed that weird log like above are disappeared after applying this patch. Signed-off-by: Hidetoshi Seto Cc: Vivek Goyal Cc: Haren Myneni Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/include/asm/mca.h | 1 + arch/ia64/kernel/crash.c | 4 ++++ arch/ia64/kernel/head.S | 2 +- arch/ia64/kernel/mca_asm.S | 27 +++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h index 44a0b53df900..cb0952f51836 100644 --- a/arch/ia64/include/asm/mca.h +++ b/arch/ia64/include/asm/mca.h @@ -151,6 +151,7 @@ extern void ia64_mca_cmc_vector_setup(void); extern int ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)); extern void ia64_unreg_MCA_extension(void); extern unsigned long ia64_get_rnat(unsigned long *); +extern void ia64_set_psr_mc(void); extern void ia64_mca_printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index f065093f8e9b..3f3a5797d198 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -129,10 +129,14 @@ void kdump_cpu_freeze(struct unw_frame_info *info, void *arg) { int cpuid; + local_irq_disable(); cpuid = smp_processor_id(); crash_save_this_cpu(); current->thread.ksp = (__u64)info->sw - 16; + + ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */ + atomic_inc(&kdump_cpu_frozen); kdump_status[cpuid] = 1; mb(); diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 23f846de62d5..e1f97ac9eefd 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1242,7 +1242,7 @@ GLOBAL_ENTRY(ia64_jump_to_sal) movl r16=SAL_PSR_BITS_TO_SET;; mov cr.ipsr=r16 mov cr.ifs=r0;; - rfi;; + rfi;; // note: this unmask MCA/INIT (psr.mc) 1: /* * Invalidate all TLB data/inst diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index a06d46548ff9..8d2eabe3119f 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -1073,3 +1073,30 @@ GLOBAL_ENTRY(ia64_get_rnat) mov ar.rsc=3 br.ret.sptk.many rp END(ia64_get_rnat) + + +// void ia64_set_psr_mc(void) +// +// Set psr.mc bit to mask MCA/INIT. +GLOBAL_ENTRY(ia64_set_psr_mc) + rsm psr.i | psr.ic // disable interrupts + ;; + srlz.d + ;; + mov r14 = psr // get psr{36:35,31:0} + movl r15 = 1f + ;; + dep r14 = -1, r14, PSR_MC, 1 // set psr.mc + ;; + dep r14 = -1, r14, PSR_IC, 1 // set psr.ic + ;; + dep r14 = -1, r14, PSR_BN, 1 // keep bank1 in use + ;; + mov cr.ipsr = r14 + mov cr.ifs = r0 + mov cr.iip = r15 + ;; + rfi +1: + br.ret.sptk.many rp +END(ia64_set_psr_mc) -- cgit v1.2.3 From 07a6a4ae827b54cec4c1b1d92bed1cc9176b45ec Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto <[seto.hidetoshi@jp.fujitsu.com]> Date: Thu, 6 Aug 2009 14:51:56 -0700 Subject: [IA64] kexec: Make INIT safe while transition to kdump/kexec kernel Summary: Asserting INIT on the beginning of kdump/kexec kernel will result in unexpected behavior because INIT handler for previous kernel is invoked on new kernel. Description: In panic situation, we can receive INIT while kernel transition, i.e. from beginning of panic to bootstrap of kdump kernel. Since we initialize registers on leave from current kernel, no longer monarch/slave handlers of current kernel in virtual mode are called safely. (In fact system goes hang as far as I confirmed) How to Reproduce: Start kdump # echo c > /proc/sysrq-trigger Then assert INIT while kdump kernel is booting, before new INIT handler for kdump kernel is registered. Expected(Desirable) result: kdump kernel boots without any problem, crashdump retrieved Actual result: INIT handler for previous kernel is invoked on kdump kernel => panic, hang etc. (unexpected) Proposed fix: We can unregister these init handlers from SAL before jumping into new kernel, however then the INIT will fallback to default behavior, result in warmboot by SAL (according to the SAL specification) and we cannot retrieve the crashdump. Therefore this patch introduces a NOP init handler and register it to SAL before leave from current kernel, to start kdump safely by preventing INITs from entering virtual mode and resulting in warmboot. On the other hand, in case of kexec that not for kdump, it also has same problem with INIT while kernel transition. This patch handles this case differently, because for kexec unregistering handlers will be preferred than registering NOP handler, since the situation "no handlers registered" is usual state for kernel's entry. Signed-off-by: Hidetoshi Seto Cc: Vivek Goyal Cc: Haren Myneni Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/include/asm/mca.h | 1 + arch/ia64/kernel/machine_kexec.c | 12 ++++++++++++ arch/ia64/kernel/mca_asm.S | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h index cb0952f51836..c171cdf0a789 100644 --- a/arch/ia64/include/asm/mca.h +++ b/arch/ia64/include/asm/mca.h @@ -145,6 +145,7 @@ extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *); extern void ia64_init_handler(struct pt_regs *, struct switch_stack *, struct ia64_sal_os_state *); +extern void ia64_os_init_on_kdump(void); extern void ia64_monarch_init_handler(void); extern void ia64_slave_init_handler(void); extern void ia64_mca_cmc_vector_setup(void); diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 0823de1f6ebe..571d66373e0a 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include typedef NORET_TYPE void (*relocate_new_kernel_t)( unsigned long indirection_page, @@ -85,11 +87,21 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) void *pal_addr = efi_get_pal_addr(); unsigned long code_addr = (unsigned long)page_address(image->control_code_page); int ii; + u64 fp, gp; + ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump; BUG_ON(!image); if (image->type == KEXEC_TYPE_CRASH) { crash_save_this_cpu(); current->thread.ksp = (__u64)info->sw - 16; + + /* Register noop init handler */ + fp = ia64_tpa(init_handler->fp); + gp = ia64_tpa(ia64_getreg(_IA64_REG_GP)); + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0); + } else { + /* Unregister init handlers of current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0); } /* Interrupts aren't acceptable while we reboot */ diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 8d2eabe3119f..7461d2573d41 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -40,6 +40,7 @@ .global ia64_do_tlb_purge .global ia64_os_mca_dispatch + .global ia64_os_init_on_kdump .global ia64_os_init_dispatch_monarch .global ia64_os_init_dispatch_slave @@ -298,6 +299,25 @@ END(ia64_os_mca_virtual_begin) //StartMain//////////////////////////////////////////////////////////////////// +// +// NOP init handler for kdump. In panic situation, we may receive INIT +// while kernel transition. Since we initialize registers on leave from +// current kernel, no longer monarch/slave handlers of current kernel in +// virtual mode are called safely. +// We can unregister these init handlers from SAL, however then the INIT +// will result in warmboot by SAL and we cannot retrieve the crashdump. +// Therefore register this NOP function to SAL, to prevent entering virtual +// mode and resulting warmboot by SAL. +// +ia64_os_init_on_kdump: + mov r8=r0 // IA64_INIT_RESUME + mov r9=r10 // SAL_GP + mov r22=r17 // *minstate + ;; + mov r10=r0 // return to same context + mov b0=r12 // SAL_CHECK return address + br b0 + // // SAL to OS entry point for INIT on all processors. This has been defined for // registration purposes with SAL as a part of ia64_mca_init. Monarch and -- cgit v1.2.3 From 6cc3efcdf01cf874ffe770919395918a3ee9365b Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto <[seto.hidetoshi@jp.fujitsu.com]> Date: Thu, 6 Aug 2009 14:51:57 -0700 Subject: [IA64] kexec: Unregister MCA handler before kexec Summary: MCA on the beginning of kdump/kexec kernel will result in unexpected behavior because MCA handler for previous kernel is invoked on the kdump kernel. Description: Once a cpu is passed to new kernel, all resources in previous kernel should not be used from the cpu. Even the resources for MCA handler are no exception. So we cannot handle MCAs and its machine check errors during kernel transition, until new handler for new kernel is registered with new resources ready for handling the MCA. How to reproduce: Assert MCA while kdump kernel is booting, before new MCA handler for kdump kernel is registered. Expected(Desirable) results: No recovery, cancel kdump and reboot the system. Actual results: MCA handler for previous kernel is invoked on the kdump kernel. => panic, hang etc. (unexpected) Proposed fix: To avoid entering MCA handler from early stage of new kernel, unregister the entry point from SAL before leave from current kernel. Then SAL will make all MCAs to warmboot safely, without invoking OS_MCA. Signed-off-by: Hidetoshi Seto Cc: Vivek Goyal Cc: Haren Myneni Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/machine_kexec.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 571d66373e0a..3d3aeef46947 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -104,6 +104,9 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0); } + /* Unregister mca handler - No more recovery on current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); -- cgit v1.2.3 From 68cb14c7c46d9204ba451a534f15a8bc12c88e28 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto <[seto.hidetoshi@jp.fujitsu.com]> Date: Thu, 6 Aug 2009 14:51:57 -0700 Subject: [IA64] kdump: Don't return APs to SAL from kdump Summary: Asserting INIT on cpu going to be offline will result in unexpected behavior. It will be a real problem in kdump cases where INIT might be asserted to unstable APs going to be offline by returning to SAL. Description: Since psr.mc is cleared when bits in psr are set to SAL_PSR_BITS_TO_SET in ia64_jump_to_sal(), there is a small window (~few msecs) that the cpu can receive INIT even if the cpu enter there via INIT handler. In this window we do restore of registers for SAL, so INIT asserted here will not work properly. It is hard to remove this window by masking INIT (i.e. setting psr.mc) because we have to unmask it later in OS, because we have to use branch instruction (br.ret, not rfi) to return SAL, due to OS_BOOT_RENDEZ to SAL return convention. I suppose this window will not be a real problem on cpu offline if we can educate people not to push INIT button during hotplug operation. However, only exception is a race in kdump and INIT. Now kdump returns APs to SAL before processing dump, but the kernel might receive INIT at that point in time. Such INIT might be asserted by kdump itself if an AP doesn't react IPI soon and kdump decided to use INIT to stop the AP. Or it might be asserted by operator or an external agent to start dump on the unstable system. Such panic+INIT or INIT+INIT cases should be rare, but it will be happy if we can retrieve crashdump even in such cases. How to reproduce: panic+INIT or INIT+INIT, with kdump configured Expected results: crashdump is retrieved anyway Actual results: panic, hang etc. (unexpected) Proposed fix To avoid the window on the way to SAL, this patch stops returning APs to SAL in case of kdump. In other words, this patch makes APs spin in OS instead of spinning in SAL. (* Note: What impact would be there? If a cpu is spinning in SAL, the cpu is in BOOT_RENDEZ loop, as same as offlined cpu. In theory if an INIT is asserted there, cpus in the BOOT_RENDEZ loop should not invoke OS_INIT on it. So in either way, no matter where the cpu is spinning actually in, once cpu starts spin and act as "frozen," INIT on the cpu have no effects. From another point of view, all debug information on the cpu should have stored to memory before the cpu start to be frozen. So no more action on the cpu is required.) I confirmed that the kdump sometime hangs by concurrent INITs (another INIT after an INIT), and it doesn't hang after applying this patch. Signed-off-by: Hidetoshi Seto Cc: Vivek Goyal Cc: Haren Myneni Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/crash.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 3f3a5797d198..b2a8b3da8af3 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -140,10 +140,6 @@ kdump_cpu_freeze(struct unw_frame_info *info, void *arg) atomic_inc(&kdump_cpu_frozen); kdump_status[cpuid] = 1; mb(); -#ifdef CONFIG_HOTPLUG_CPU - if (cpuid != 0) - ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]); -#endif for (;;) cpu_relax(); } -- cgit v1.2.3 From 1726b0883dd08636705ea55d577eb0ec314ba427 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto <[seto.hidetoshi@jp.fujitsu.com]> Date: Thu, 6 Aug 2009 14:51:57 -0700 Subject: [IA64] kdump: Mask INIT first in panic-kdump path Summary: Asserting INIT might block kdump if the system is already going to start kdump via panic. Description: INIT can interrupt anywhere in panic path, so it can interrupt in middle of kdump kicked by panic. Therefore there is a race if kdump is kicked concurrently, via Panic and via INIT. INIT could fail to invoke kdump if the system is already going to start kdump via panic. It could not restart kdump from INIT handler if some of cpus are already playing dead with INIT masked. It also means that INIT could block kdump's progress if no monarch is entered in the INIT rendezvous. Panic+INIT is a rare, but possible situation since it can be assumed that the kernel or an internal agent decides to panic the unstable system while another external agent decides to send an INIT to the system at same time. How to reproduce: Assert INIT just after panic, before all other cpus have frozen Expected results: continue kdump invoked by panic, or restart kdump from INIT Actual results: might be hang, crashdump not retrieved Proposed Fix: This patch masks INIT first in panic path to take the initiative on kdump, and reuse atomic value kdump_in_progress to make sure there is only one initiator of kdump. All INITs asserted later should be used only for freezing all other cpus. This mask will be removed soon by rfi in relocate_kernel.S, before jump into kdump kernel, after all cpus are frozen and no-op INIT handler is registered. So if INIT was in the interval while it is masked, it will pend on the system and will received just after the rfi, and handled by the no-op handler. If there was a MCA event while psr.mc is 1, in theory the event will pend on the system and will received just after the rfi same as above. MCA handler is unregistered here at the time, so received MCA will not reach to OS_MCA and will result in warmboot by SAL. Note that codes in this masked interval are relatively simpler than that in MCA/INIT handler which also executed with the mask. So it can be said that probability of error in this interval is supposed not so higher than that in MCA/INIT handler. Signed-off-by: Hidetoshi Seto Cc: Vivek Goyal Cc: Haren Myneni Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/crash.c | 47 +++++++++++++++++++++++++++++++++----- arch/ia64/kernel/relocate_kernel.S | 2 +- 2 files changed, 42 insertions(+), 7 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index b2a8b3da8af3..9c851b73f276 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -23,6 +23,7 @@ int kdump_status[NR_CPUS]; static atomic_t kdump_cpu_frozen; atomic_t kdump_in_progress; +static int kdump_freeze_monarch; static int kdump_on_init = 1; static int kdump_on_fatal_mca = 1; @@ -108,6 +109,33 @@ machine_crash_shutdown(struct pt_regs *pt) */ kexec_disable_iosapic(); #ifdef CONFIG_SMP + /* + * If kdump_on_init is set and an INIT is asserted here, kdump will + * be started again via INIT monarch. + */ + local_irq_disable(); + ia64_set_psr_mc(); /* mask MCA/INIT */ + if (atomic_inc_return(&kdump_in_progress) != 1) + unw_init_running(kdump_cpu_freeze, NULL); + + /* + * Now this cpu is ready for kdump. + * Stop all others by IPI or INIT. They could receive INIT from + * outside and might be INIT monarch, but only thing they have to + * do is falling into kdump_cpu_freeze(). + * + * If an INIT is asserted here: + * - All receivers might be slaves, since some of cpus could already + * be frozen and INIT might be masked on monarch. In this case, + * all slaves will park in while (monarch_cpu == -1) loop before + * DIE_INIT_SLAVE_ENTER that for waiting monarch enters. + * => TBD: freeze all slaves + * - One might be a monarch, but INIT rendezvous will fail since + * at least this cpu already have INIT masked so it never join + * to the rendezvous. In this case, all slaves and monarch will + * be frozen after timeout of the INIT rendezvous. + * => TBD: freeze them without waiting timeout + */ kdump_smp_send_stop(); /* not all cpu response to IPI, send INIT to freeze them */ if (kdump_wait_cpu_freeze() && kdump_on_init) { @@ -177,13 +205,18 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) switch (val) { case DIE_INIT_MONARCH_PROCESS: if (kdump_on_init) { - atomic_set(&kdump_in_progress, 1); + if (atomic_inc_return(&kdump_in_progress) != 1) + kdump_freeze_monarch = 1; *(nd->monarch_cpu) = -1; } break; case DIE_INIT_MONARCH_LEAVE: - if (kdump_on_init) - machine_kdump_on_init(); + if (kdump_on_init) { + if (kdump_freeze_monarch) + unw_init_running(kdump_cpu_freeze, NULL); + else + machine_kdump_on_init(); + } break; case DIE_INIT_SLAVE_LEAVE: if (atomic_read(&kdump_in_progress)) @@ -196,9 +229,11 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) case DIE_MCA_MONARCH_LEAVE: /* *(nd->data) indicate if MCA is recoverable */ if (kdump_on_fatal_mca && !(*(nd->data))) { - atomic_set(&kdump_in_progress, 1); - *(nd->monarch_cpu) = -1; - machine_kdump_on_init(); + if (atomic_inc_return(&kdump_in_progress) == 1) { + *(nd->monarch_cpu) = -1; + machine_kdump_on_init(); + } + /* We got fatal MCA while kdump!? No way!! */ } break; } diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S index 903babd22d62..32f6fc131fbe 100644 --- a/arch/ia64/kernel/relocate_kernel.S +++ b/arch/ia64/kernel/relocate_kernel.S @@ -52,7 +52,7 @@ GLOBAL_ENTRY(relocate_new_kernel) srlz.i ;; mov ar.rnat=r18 - rfi + rfi // note: this unmask MCA/INIT (psr.mc) ;; 1: //physical mode code begin -- cgit v1.2.3 From 5959906ee9dee602a46e49c868a7e543e050d605 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto <[seto.hidetoshi@jp.fujitsu.com]> Date: Thu, 6 Aug 2009 14:51:57 -0700 Subject: [IA64] kdump: Try INIT regardless of kdump_on_init CPUs should be frozen if possible, otherwise it might hinder kdump. So if there are CPUs not respond to IPI, try INIT to stop them. Signed-off-by: Hidetoshi Seto Cc: Vivek Goyal Cc: Haren Myneni Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/crash.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 9c851b73f276..0995fdc7b299 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -138,8 +138,10 @@ machine_crash_shutdown(struct pt_regs *pt) */ kdump_smp_send_stop(); /* not all cpu response to IPI, send INIT to freeze them */ - if (kdump_wait_cpu_freeze() && kdump_on_init) { + if (kdump_wait_cpu_freeze()) { kdump_smp_send_init(); + /* wait again, don't go ahead if possible */ + kdump_wait_cpu_freeze(); } #endif } @@ -178,6 +180,19 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) struct ia64_mca_notify_die *nd; struct die_args *args = data; + if (atomic_read(&kdump_in_progress)) { + switch (val) { + case DIE_INIT_MONARCH_LEAVE: + if (!kdump_freeze_monarch) + break; + /* fall through */ + case DIE_INIT_SLAVE_LEAVE: + case DIE_MCA_RENDZVOUS_LEAVE: + unw_init_running(kdump_cpu_freeze, NULL); + break; + } + } + if (!kdump_on_init && !kdump_on_fatal_mca) return NOTIFY_DONE; @@ -190,41 +205,25 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) } if (val != DIE_INIT_MONARCH_LEAVE && - val != DIE_INIT_SLAVE_LEAVE && val != DIE_INIT_MONARCH_PROCESS && - val != DIE_MCA_RENDZVOUS_LEAVE && val != DIE_MCA_MONARCH_LEAVE) return NOTIFY_DONE; nd = (struct ia64_mca_notify_die *)args->err; - /* Reason code 1 means machine check rendezvous*/ - if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE - || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1) - return NOTIFY_DONE; switch (val) { case DIE_INIT_MONARCH_PROCESS: - if (kdump_on_init) { + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) { if (atomic_inc_return(&kdump_in_progress) != 1) kdump_freeze_monarch = 1; *(nd->monarch_cpu) = -1; } break; case DIE_INIT_MONARCH_LEAVE: - if (kdump_on_init) { - if (kdump_freeze_monarch) - unw_init_running(kdump_cpu_freeze, NULL); - else - machine_kdump_on_init(); - } - break; - case DIE_INIT_SLAVE_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); - break; - case DIE_MCA_RENDZVOUS_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) + machine_kdump_on_init(); break; case DIE_MCA_MONARCH_LEAVE: /* *(nd->data) indicate if MCA is recoverable */ -- cgit v1.2.3 From 0cced40e7c58b1105aef3ca446da7b158a18a9a6 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto <[seto.hidetoshi@jp.fujitsu.com]> Date: Thu, 6 Aug 2009 14:51:58 -0700 Subject: [IA64] kdump: Short path to freeze CPUs Setting monarch_cpu = -1 to let slaves frozen might not work, because there might be slaves being late, not entered the rendezvous yet. Such slaves might be caught in while (monarch_cpu == -1) loop. Use kdump_in_progress instead of monarch_cpus to break INIT rendezvous and let all slaves enter DIE_INIT_SLAVE_LEAVE smoothly. And monarch no longer need to manage rendezvous if once kdump_in_progress is set, catch the monarch in DIE_INIT_MONARCH_ENTER then. Signed-off-by: Hidetoshi Seto Cc: Vivek Goyal Cc: Haren Myneni Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/crash.c | 15 ++++++--------- arch/ia64/kernel/mca.c | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 11 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 0995fdc7b299..6631a9dfafdc 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -127,14 +127,13 @@ machine_crash_shutdown(struct pt_regs *pt) * If an INIT is asserted here: * - All receivers might be slaves, since some of cpus could already * be frozen and INIT might be masked on monarch. In this case, - * all slaves will park in while (monarch_cpu == -1) loop before - * DIE_INIT_SLAVE_ENTER that for waiting monarch enters. - * => TBD: freeze all slaves + * all slaves will be frozen soon since kdump_in_progress will let + * them into DIE_INIT_SLAVE_LEAVE. * - One might be a monarch, but INIT rendezvous will fail since * at least this cpu already have INIT masked so it never join * to the rendezvous. In this case, all slaves and monarch will - * be frozen after timeout of the INIT rendezvous. - * => TBD: freeze them without waiting timeout + * be frozen soon with no wait since the INIT rendezvous is skipped + * by kdump_in_progress. */ kdump_smp_send_stop(); /* not all cpu response to IPI, send INIT to freeze them */ @@ -187,6 +186,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) break; /* fall through */ case DIE_INIT_SLAVE_LEAVE: + case DIE_INIT_MONARCH_ENTER: case DIE_MCA_RENDZVOUS_LEAVE: unw_init_running(kdump_cpu_freeze, NULL); break; @@ -217,7 +217,6 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) if (kdump_on_init && (nd->sos->rv_rc != 1)) { if (atomic_inc_return(&kdump_in_progress) != 1) kdump_freeze_monarch = 1; - *(nd->monarch_cpu) = -1; } break; case DIE_INIT_MONARCH_LEAVE: @@ -228,10 +227,8 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) case DIE_MCA_MONARCH_LEAVE: /* *(nd->data) indicate if MCA is recoverable */ if (kdump_on_fatal_mca && !(*(nd->data))) { - if (atomic_inc_return(&kdump_in_progress) == 1) { - *(nd->monarch_cpu) = -1; + if (atomic_inc_return(&kdump_in_progress) == 1) machine_kdump_on_init(); - } /* We got fatal MCA while kdump!? No way!! */ } break; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 7b30d21c5190..d2877a7bfe2e 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1682,14 +1682,25 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (!sos->monarch) { ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; + +#ifdef CONFIG_KEXEC + while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else while (monarch_cpu == -1) - cpu_relax(); /* spin until monarch enters */ + cpu_relax(); /* spin until monarch enters */ +#endif NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); +#ifdef CONFIG_KEXEC + while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else while (monarch_cpu != -1) - cpu_relax(); /* spin until monarch leaves */ + cpu_relax(); /* spin until monarch leaves */ +#endif NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); -- cgit v1.2.3 From 353f6dd2dec992ddd34620a94b051b0f76227379 Mon Sep 17 00:00:00 2001 From: Anirban Sinha Date: Mon, 14 Sep 2009 11:13:37 -0700 Subject: cleanup console_print() console_print() is an old legacy interface mostly unused in the entire kernel tree. It's best to clean up its existing use and let developers use their own implementation of it as they feel fit. Signed-off-by: Anirban Sinha Signed-off-by: Linus Torvalds --- arch/ia64/kernel/head.S | 1 + arch/ia64/kernel/head.h | 1 + arch/ia64/kernel/process.c | 7 +++++++ 3 files changed, 9 insertions(+) create mode 100644 arch/ia64/kernel/head.h (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 23f846de62d5..e6c5c3d5e1f8 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -34,6 +34,7 @@ #include #include #include +#include "head.h" #ifdef CONFIG_HOTPLUG_CPU #define SAL_PSR_BITS_TO_SET \ diff --git a/arch/ia64/kernel/head.h b/arch/ia64/kernel/head.h new file mode 100644 index 000000000000..2e2ac6824e65 --- /dev/null +++ b/arch/ia64/kernel/head.h @@ -0,0 +1 @@ +extern void console_print(const char *s); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 89969e950045..9bcec9945c12 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -161,6 +161,13 @@ show_regs (struct pt_regs *regs) show_stack(NULL, NULL); } +/* local support for deprecated console_print */ +void +console_print(const char *s) +{ + printk(KERN_EMERG "%s", s); +} + void do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) { -- cgit v1.2.3 From c88d5910890ad35af283344417891344604f0438 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Sep 2009 13:50:02 +0200 Subject: sched: Merge select_task_rq_fair() and sched_balance_self() The problem with wake_idle() is that is doesn't respect things like cpu_power, which means it doesn't deal well with SMT nor the recent RT interaction. To cure this, it needs to do what sched_balance_self() does, which leads to the possibility of merging select_task_rq_fair() and sched_balance_self(). Modify sched_balance_self() to: - update_shares() when walking up the domain tree, (it only called it for the top domain, but it should have done this anyway), which allows us to remove this ugly bit from try_to_wake_up(). - do wake_affine() on the smallest domain that contains both this (the waking) and the prev (the wakee) cpu for WAKE invocations. Then use the top-down balance steps it had to replace wake_idle(). This leads to the dissapearance of SD_WAKE_BALANCE and SD_WAKE_IDLE_FAR, with SD_WAKE_IDLE replaced with SD_BALANCE_WAKE. SD_WAKE_AFFINE needs SD_BALANCE_WAKE to be effective. Touch all topology bits to replace the old with new SD flags -- platforms might need re-tuning, enabling SD_BALANCE_WAKE conditionally on a NUMA distance seems like a good additional feature, magny-core and small nehalem systems would want this enabled, systems with slow interconnects would not. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 7b4c8c70b2d1..cf6053b226c3 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -67,6 +67,7 @@ void build_cpu_to_node_map(void); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ + | SD_BALANCE_WAKE \ | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ .balance_interval = 1, \ @@ -91,8 +92,8 @@ void build_cpu_to_node_map(void); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_BALANCE_WAKE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ .nr_balance_failed = 0, \ -- cgit v1.2.3 From 78e7ed53c9f42f04f9401ada6f7047db60781676 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Sep 2009 13:16:51 +0200 Subject: sched: Tweak wake_idx When merging select_task_rq_fair() and sched_balance_self() we lost the use of wake_idx, restore that and set them to 0 to make wake balancing more aggressive. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index cf6053b226c3..47f3c51d5e27 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -62,11 +62,12 @@ void build_cpu_to_node_map(void); .busy_idx = 2, \ .idle_idx = 1, \ .newidle_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_BALANCE_WAKE \ | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ @@ -87,7 +88,7 @@ void build_cpu_to_node_map(void); .busy_idx = 3, \ .idle_idx = 2, \ .newidle_idx = 2, \ - .wake_idx = 1, \ + .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ -- cgit v1.2.3 From 0ec9fab3d186d9cbb00c0f694d4a260d07c198d9 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 15 Sep 2009 15:07:03 +0200 Subject: sched: Improve latencies and throughput Make the idle balancer more agressive, to improve a x264 encoding workload provided by Jason Garrett-Glaser: NEXT_BUDDY NO_LB_BIAS encoded 600 frames, 252.82 fps, 22096.60 kb/s encoded 600 frames, 250.69 fps, 22096.60 kb/s encoded 600 frames, 245.76 fps, 22096.60 kb/s NO_NEXT_BUDDY LB_BIAS encoded 600 frames, 344.44 fps, 22096.60 kb/s encoded 600 frames, 346.66 fps, 22096.60 kb/s encoded 600 frames, 352.59 fps, 22096.60 kb/s NO_NEXT_BUDDY NO_LB_BIAS encoded 600 frames, 425.75 fps, 22096.60 kb/s encoded 600 frames, 425.45 fps, 22096.60 kb/s encoded 600 frames, 422.49 fps, 22096.60 kb/s Peter pointed out that this is better done via newidle_idx, not via LB_BIAS, newidle balancing should look for where there is load _now_, not where there was load 2 ticks ago. Worst-case latencies are improved as well as no buddies means less vruntime spread. (as per prior lkml discussions) This change improves kbuild-peak parallelism as well. Reported-by: Jason Garrett-Glaser Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1253011667.9128.16.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 47f3c51d5e27..42f1673ec83f 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -61,7 +61,7 @@ void build_cpu_to_node_map(void); .cache_nice_tries = 2, \ .busy_idx = 2, \ .idle_idx = 1, \ - .newidle_idx = 2, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ @@ -87,10 +87,11 @@ void build_cpu_to_node_map(void); .cache_nice_tries = 2, \ .busy_idx = 3, \ .idle_idx = 2, \ - .newidle_idx = 2, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ | SD_BALANCE_WAKE \ -- cgit v1.2.3 From b8a543ea5a5896830a9969bacfd047f9d15940b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Sep 2009 15:22:03 +0200 Subject: sched: Reduce forkexec_idx If we're looking to place a new task, we might as well find the idlest position _now_, not 1 tick ago. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 42f1673ec83f..569b9dafc78c 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -63,7 +63,7 @@ void build_cpu_to_node_map(void); .idle_idx = 1, \ .newidle_idx = 0, \ .wake_idx = 0, \ - .forkexec_idx = 1, \ + .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ @@ -89,7 +89,7 @@ void build_cpu_to_node_map(void); .idle_idx = 2, \ .newidle_idx = 0, \ .wake_idx = 0, \ - .forkexec_idx = 1, \ + .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ -- cgit v1.2.3 From 9b6b93998a0d9c44d6701be5b472f3302c3a8596 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 25 Aug 2009 20:54:10 +0200 Subject: [IA64] pci_br, fix infinite loop in find_free_ate() When * there is almost out of ates * one asks for more than one ate * there are some available at the end of ate array then the inner for loop will end without incrementing 'index'. This means the outer loop will start at the same point finding it's available and runs the inner loop again from the same index. This repeats forever. Hence make sure we check we were at the end of ate array and return an error in such case. Signed-off-by: Jiri Slaby Cc: Fenghua Yu Found-by: Jeff Mahoney Signed-off-by: Tony Luck --- arch/ia64/sn/pci/pcibr/pcibr_ate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 239b3cedcf2b..5bc34eac9e01 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -54,6 +54,8 @@ static int find_free_ate(struct ate_resource *ate_resource, int start, break; } } + if (i >= ate_resource->num_ate) + return -1; } else index++; /* Try next ate */ } -- cgit v1.2.3 From f172468a142c6989144ccb827c1b57c69229e8ba Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Fri, 31 Jul 2009 16:57:51 -0400 Subject: [IA64] Use .ref.text, not .text.init for start_ap. It seems that start_ap doesn't need to be in a special location in the kernel, but it references some init code so it should be in .ref.text. Since this is the only thing in the .text.head section, eliminate .text.head from the linker script. Signed-off-by: Tim Abbott Signed-off-by: Tony Luck --- arch/ia64/kernel/head.S | 2 +- arch/ia64/kernel/vmlinux.lds.S | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index e1f97ac9eefd..12305d3d4838 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -181,7 +181,7 @@ swapper_pg_dir: halt_msg: stringz "Halting kernel\n" - .section .text.head,"ax" + __REF .global start_ap diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 4a95e86b9ac2..fa3a558e3d76 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -51,8 +51,6 @@ SECTIONS KPROBES_TEXT *(.gnu.linkonce.t*) } - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) - { *(.text.head) } .text2 : AT(ADDR(.text2) - LOAD_OFFSET) { *(.text2) } #ifdef CONFIG_SMP -- cgit v1.2.3 From ed7af3e63bd2458d5138c4b7e92fe4e1cdc97d9d Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Fri, 31 Jul 2009 16:57:52 -0400 Subject: [IA64] Use standard macros for page-aligned data. Signed-off-by: Nelson Elhage Signed-off-by: Tony Luck --- arch/ia64/kernel/head.S | 2 +- arch/ia64/kernel/vmlinux.lds.S | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 12305d3d4838..1a6e44515eb4 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -167,7 +167,7 @@ RestRR: \ mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ mov rr[_tmp1]=_tmp2 - .section __special_page_section,"ax" + __PAGE_ALIGNED_DATA .global empty_zero_page empty_zero_page: diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index fa3a558e3d76..b484b86da830 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -219,7 +219,9 @@ SECTIONS { *(.data.init_task) } .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) - { *(__special_page_section) + { + PAGE_ALIGNED_DATA(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); __start_gate_section = .; *(.data.gate) __stop_gate_section = .; -- cgit v1.2.3 From 6ae86350857bf3e862f8dcd10039ccb45e056f85 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Fri, 31 Jul 2009 16:57:53 -0400 Subject: [IA64] Clean up linker script using standard macros. Aside from using fewer output sections and moving some data around, the main side effect of this change is changing the alignment of some sections. In particular: * cachline-aligned and read_mostly data are now aligned to SMP_CACHE_BYTES. (Previously, they were laid out consecutively after a PAGE_SIZE alignment) * .init.ramfs is now page-aligned, per the INIT_RAM_FS macro. (Previously it had no explicit alignment). Signed-off-by: Nelson Elhage Signed-off-by: Tony Luck --- arch/ia64/kernel/vmlinux.lds.S | 109 +++++------------------------------------ 1 file changed, 11 insertions(+), 98 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index b484b86da830..f69f411516d5 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -64,14 +64,7 @@ SECTIONS NOTES :code :note /* put .notes in text and mark in PT_NOTE */ code_continues : {} :code /* switch back to regular program... */ - /* Exception table */ - . = ALIGN(16); - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) - { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(16) /* MCA table */ . = ALIGN(16); @@ -113,38 +106,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_begin = .; - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) - { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) - { INIT_DATA } - -#ifdef CONFIG_BLK_DEV_INITRD - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) - { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) - { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) - { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) { @@ -202,22 +166,9 @@ SECTIONS } #endif - . = ALIGN(8); - __con_initcall_start = .; - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) - { *(.con_initcall.init) } - __con_initcall_end = .; - __security_initcall_start = .; - .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) - { *(.security_initcall.init) } - __security_initcall_end = .; . = ALIGN(PAGE_SIZE); __init_end = .; - /* The initial task and kernel stack */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) - { *(.data.init_task) } - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { PAGE_ALIGNED_DATA(PAGE_SIZE) @@ -236,12 +187,6 @@ SECTIONS * kernel data */ - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) - { *(.data.read_mostly) } - - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) - { *(.data.cacheline_aligned) } - /* Per-cpu data: */ . = ALIGN(PERCPU_PAGE_SIZE); PERCPU_VADDR(PERCPU_ADDR, :percpu) @@ -258,6 +203,9 @@ SECTIONS __cpu0_per_cpu = .; . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ #endif + INIT_TASK_DATA(PAGE_SIZE) + CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) + READ_MOSTLY_DATA(SMP_CACHE_BYTES) DATA_DATA *(.data1) *(.gnu.linkonce.d*) @@ -274,49 +222,14 @@ SECTIONS .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { *(.sdata) *(.sdata1) *(.srdata) } _edata = .; - __bss_start = .; - .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) - { *(.sbss) *(.scommon) } - .bss : AT(ADDR(.bss) - LOAD_OFFSET) - { *(.bss) *(COMMON) } - __bss_stop = .; + + BSS_SECTION(0, 0, 0) _end = .; code : { } :code - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ - /DISCARD/ : { *(.comment) } + + STABS_DEBUG + DWARF_DEBUG /DISCARD/ : { *(.note) } } -- cgit v1.2.3 From 182a85f8a119c789610a9d464f4129ded9f3c107 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Sep 2009 13:24:49 +0200 Subject: sched: Disable wakeup balancing Sysbench thinks SD_BALANCE_WAKE is too agressive and kbuild doesn't really mind too much, SD_BALANCE_NEWIDLE picks up most of the slack. On a dual socket, quad core, dual thread nehalem system: sysbench (--num_threads=16): SD_BALANCE_WAKE-: 13982 tx/s SD_BALANCE_WAKE+: 15688 tx/s kbuild (-j16): SD_BALANCE_WAKE-: 47.648295846 seconds time elapsed ( +- 0.312% ) SD_BALANCE_WAKE+: 47.608607360 seconds time elapsed ( +- 0.026% ) (same within noise) Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 569b9dafc78c..d0141fbf51d0 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -68,7 +68,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_BALANCE_WAKE \ | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ .balance_interval = 1, \ @@ -94,7 +93,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_BALANCE_WAKE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ -- cgit v1.2.3 From f4c3f03838ae47a92f2d15d48ddf68deae5d7ebb Mon Sep 17 00:00:00 2001 From: Anirban Sinha Date: Wed, 16 Sep 2009 10:16:18 -0700 Subject: Fix ia64 build breakage in head.S The "cleanup console_print()" patch in commit 353f6dd2dec992ddd34620a94b051b0f76227379 introduced an "extern" declaration into an assembly language file. Remove it. Signed-off-by: Anirban Sinha Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- arch/ia64/kernel/head.S | 1 - arch/ia64/kernel/head.h | 1 - 2 files changed, 2 deletions(-) delete mode 100644 arch/ia64/kernel/head.h (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index e6c5c3d5e1f8..23f846de62d5 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -34,7 +34,6 @@ #include #include #include -#include "head.h" #ifdef CONFIG_HOTPLUG_CPU #define SAL_PSR_BITS_TO_SET \ diff --git a/arch/ia64/kernel/head.h b/arch/ia64/kernel/head.h deleted file mode 100644 index 2e2ac6824e65..000000000000 --- a/arch/ia64/kernel/head.h +++ /dev/null @@ -1 +0,0 @@ -extern void console_print(const char *s); -- cgit v1.2.3