From a9415644583ef344e02f84faf5fe24bfadb2af8e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Jan 2006 12:17:48 -0800 Subject: [PATCH] capable/capability.h (arch/) arch: Use where capable() is used. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel/mce.c') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 183dc6105429..ee5f65c44214 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 6e3f361781573a27296c77a79c9892dec8f2e36c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 11 Jan 2006 22:42:14 +0100 Subject: [PATCH] x86_64: make trap information available to die notification handlers This adjusts things so that handlers of the die() notifier will have sufficient information about the trap currently being handled. It also adjusts the notify_die() prototype to (again) match that of i386. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel/mce.c') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index ee5f65c44214..63777b8cb8c1 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -169,7 +169,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) int panicm_found = 0; if (regs) - notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL); + notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); if (!banks) return; -- cgit v1.2.3 From 73ca5358aab55e2e29993a31f3776c54c05ae729 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 11 Jan 2006 22:43:06 +0100 Subject: [PATCH] x86_64: increase MCE bank counts There is one CPU here whose MCE bank count is 6. This patch increases x86_64's MCE bank count. Signed-off-by: Shaohua Li Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/x86_64/kernel/mce.c') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 63777b8cb8c1..281ad5f1f86a 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -26,7 +26,7 @@ #include #define MISC_MCELOG_MINOR 227 -#define NR_BANKS 5 +#define NR_BANKS 6 static int mce_dont_init; @@ -574,6 +574,10 @@ ACCESSOR(bank1ctl,bank[1],mce_restart()) ACCESSOR(bank2ctl,bank[2],mce_restart()) ACCESSOR(bank3ctl,bank[3],mce_restart()) ACCESSOR(bank4ctl,bank[4],mce_restart()) +ACCESSOR(bank5ctl,bank[5],mce_restart()) +static struct sysdev_attribute * bank_attributes[NR_BANKS] = { + &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, + &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl}; ACCESSOR(tolerant,tolerant,) ACCESSOR(check_interval,check_interval,mce_restart()) @@ -581,6 +585,7 @@ ACCESSOR(check_interval,check_interval,mce_restart()) static __cpuinit int mce_create_device(unsigned int cpu) { int err; + int i; if (!mce_available(&cpu_data[cpu])) return -EIO; @@ -590,11 +595,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) err = sysdev_register(&per_cpu(device_mce,cpu)); if (!err) { - sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); - sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); - sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); - sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); - sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); + for (i = 0; i < banks; i++) + sysdev_create_file(&per_cpu(device_mce,cpu), + bank_attributes[i]); sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); } @@ -604,11 +607,11 @@ static __cpuinit int mce_create_device(unsigned int cpu) #ifdef CONFIG_HOTPLUG_CPU static __cpuinit void mce_remove_device(unsigned int cpu) { - sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); - sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); - sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); - sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); - sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); + int i; + + for (i = 0; i < banks; i++) + sysdev_remove_file(&per_cpu(device_mce,cpu), + bank_attributes[i]); sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); sysdev_unregister(&per_cpu(device_mce,cpu)); -- cgit v1.2.3 From 4855170f9876c8b4a16f115f32cf6851bac1ffcc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Jan 2006 22:44:48 +0100 Subject: [PATCH] x86_64: Make it clear in machine checks that it's an hardware problem Hopefully the users will take the hint. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86_64/kernel/mce.c') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 281ad5f1f86a..b8f28ebdce26 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -92,6 +92,7 @@ void mce_log(struct mce *mce) static void print_mce(struct mce *m) { printk(KERN_EMERG "\n" + KERN_EMERG "HARDWARE ERROR\n" KERN_EMERG "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->cpu, m->mcgstatus, m->bank, m->status); @@ -110,6 +111,9 @@ static void print_mce(struct mce *m) if (m->misc) printk("MISC %Lx ", m->misc); printk("\n"); + printk(KERN_EMERG "This is not a software problem!\n"); + printk(KERN_EMERG + "Run through mcelog --ascii to decode and contact your hardware vendor\n"); } static void mce_panic(char *msg, struct mce *backup, unsigned long start) -- cgit v1.2.3 From 0a9c3ee7692fa20670986bcf550950e88ab9b4cc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Jan 2006 22:46:54 +0100 Subject: [PATCH] x86_64: Use safe_smp_processor_id in MCE handler hard_smp_processor_id would return the local APIC id instead of the Linux processor id. On big systems they are often not identical. safe_smp_processor_id is just a wrapper around it that does the necessary conversions. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel/mce.c') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index b8f28ebdce26..13a2eada6c95 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -24,6 +24,7 @@ #include #include #include +#include #define MISC_MCELOG_MINOR 227 #define NR_BANKS 6 @@ -178,7 +179,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) return; memset(&m, 0, sizeof(struct mce)); - m.cpu = hard_smp_processor_id(); + m.cpu = safe_smp_processor_id(); rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); if (!(m.mcgstatus & MCG_STATUS_RIPV)) kill_it = 1; -- cgit v1.2.3